cmd/k8s-operator: fix Service reconcile triggers for default ProxyClass (#18983)

The e2e ingress test was very occasionally flaky. On looking at operator
logs from one failure, you can see the default ProxyClass was not ready
before the first reconcile loop for the exposed Service. The ProxyClass
became ready soon after, but no additional reconciles were triggered for
the exposed Service because we only triggered reconciles for Services
that explicitly named their ProxyClass.

This change adds additional list API calls for when it's the default
ProxyClass that's been updated in order to catch Services that use it by
default. It also adds indexes for the fields we need to search on to
ensure the list is efficient.

Fixes tailscale/corp#37533

Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
Tom Proctor
2026-03-13 14:31:16 +00:00
committed by GitHub
parent dd480f0fb9
commit 621f71981c
5 changed files with 331 additions and 90 deletions
+10 -12
View File
@@ -42,8 +42,6 @@ const (
reasonProxyInvalid = "ProxyInvalid"
reasonProxyFailed = "ProxyFailed"
reasonProxyPending = "ProxyPending"
indexServiceProxyClass = ".metadata.annotations.service-proxy-class"
)
type ServiceReconciler struct {
@@ -97,7 +95,7 @@ func childResourceLabels(name, ns, typ string) map[string]string {
func (a *ServiceReconciler) isTailscaleService(svc *corev1.Service) bool {
targetIP := tailnetTargetAnnotation(svc)
targetFQDN := svc.Annotations[AnnotationTailnetTargetFQDN]
return a.shouldExpose(svc) || targetIP != "" || targetFQDN != ""
return shouldExpose(svc, a.isDefaultLoadBalancer) || targetIP != "" || targetFQDN != ""
}
func (a *ServiceReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
@@ -164,7 +162,7 @@ func (a *ServiceReconciler) maybeCleanup(ctx context.Context, logger *zap.Sugare
}
proxyTyp := proxyTypeEgress
if a.shouldExpose(svc) {
if shouldExpose(svc, a.isDefaultLoadBalancer) {
proxyTyp = proxyTypeIngressService
}
@@ -275,16 +273,16 @@ func (a *ServiceReconciler) maybeProvision(ctx context.Context, logger *zap.Suga
LoginServer: a.ssr.loginServer,
}
sts.proxyType = proxyTypeEgress
if a.shouldExpose(svc) {
if shouldExpose(svc, a.isDefaultLoadBalancer) {
sts.proxyType = proxyTypeIngressService
}
a.mu.Lock()
if a.shouldExposeClusterIP(svc) {
if shouldExposeClusterIP(svc, a.isDefaultLoadBalancer) {
sts.ClusterTargetIP = svc.Spec.ClusterIP
a.managedIngressProxies.Add(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
} else if a.shouldExposeDNSName(svc) {
} else if shouldExposeDNSName(svc) {
sts.ClusterTargetDNSName = svc.Spec.ExternalName
a.managedIngressProxies.Add(svc.UID)
gaugeIngressProxies.Set(int64(a.managedIngressProxies.Len()))
@@ -410,19 +408,19 @@ func validateService(svc *corev1.Service) []string {
return violations
}
func (a *ServiceReconciler) shouldExpose(svc *corev1.Service) bool {
return a.shouldExposeClusterIP(svc) || a.shouldExposeDNSName(svc)
func shouldExpose(svc *corev1.Service, isDefaultLoadBalancer bool) bool {
return shouldExposeClusterIP(svc, isDefaultLoadBalancer) || shouldExposeDNSName(svc)
}
func (a *ServiceReconciler) shouldExposeDNSName(svc *corev1.Service) bool {
func shouldExposeDNSName(svc *corev1.Service) bool {
return hasExposeAnnotation(svc) && svc.Spec.Type == corev1.ServiceTypeExternalName && svc.Spec.ExternalName != ""
}
func (a *ServiceReconciler) shouldExposeClusterIP(svc *corev1.Service) bool {
func shouldExposeClusterIP(svc *corev1.Service, isDefaultLoadBalancer bool) bool {
if svc.Spec.ClusterIP == "" {
return false
}
return isTailscaleLoadBalancerService(svc, a.isDefaultLoadBalancer) || hasExposeAnnotation(svc)
return isTailscaleLoadBalancerService(svc, isDefaultLoadBalancer) || hasExposeAnnotation(svc)
}
func isTailscaleLoadBalancerService(svc *corev1.Service, isDefaultLoadBalancer bool) bool {