cmd/k8s-operator: check that cluster traffic is routed to egress ProxyGroup Pod before marking it as ready (#14792)

This change builds on top of #14436 to ensure minimum downtime during egress ProxyGroup update rollouts:

- adds a readiness gate for ProxyGroup replicas that prevents kubelet from marking
the replica Pod as ready before a corresponding readiness condition has been added
to the Pod

- adds a reconciler that reconciles egress ProxyGroup Pods and, for each that is not ready,
if cluster traffic for relevant egress endpoints is routed via this Pod- if so add the
readiness condition to allow kubelet to mark the Pod as ready.

During the sequenced StatefulSet update rollouts kubelet does not restart
a Pod before the previous replica has been updated and marked as ready, so
ensuring that a replica is not marked as ready allows to avoid a temporary
post-update situation where all replicas have been restarted, but none of the
new ones are yet set up as an endpoint for the egress service, so cluster traffic is dropped.

Updates tailscale/tailscale#14326

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina
2025-01-30 10:47:45 +02:00
committed by GitHub
parent 8bd04bdd3a
commit 3f39211f98
7 changed files with 916 additions and 8 deletions
+70
View File
@@ -9,6 +9,7 @@ package main
import (
"context"
"net/http"
"os"
"regexp"
"strconv"
@@ -453,6 +454,24 @@ func runReconcilers(opts reconcilerOpts) {
startlog.Fatalf("could not create egress EndpointSlices reconciler: %v", err)
}
podsForEps := handler.EnqueueRequestsFromMapFunc(podsFromEgressEps(mgr.GetClient(), opts.log, opts.tailscaleNamespace))
podsER := handler.EnqueueRequestsFromMapFunc(egressPodsHandler)
err = builder.
ControllerManagedBy(mgr).
Named("egress-pods-readiness-reconciler").
Watches(&discoveryv1.EndpointSlice{}, podsForEps).
Watches(&corev1.Pod{}, podsER).
Complete(&egressPodsReconciler{
Client: mgr.GetClient(),
tsNamespace: opts.tailscaleNamespace,
clock: tstime.DefaultClock{},
logger: opts.log.Named("egress-pods-readiness-reconciler"),
httpClient: http.DefaultClient,
})
if err != nil {
startlog.Fatalf("could not create egress Pods readiness reconciler: %v", err)
}
// ProxyClass reconciler gets triggered on ServiceMonitor CRD changes to ensure that any ProxyClasses, that
// define that a ServiceMonitor should be created, were set to invalid because the CRD did not exist get
// reconciled if the CRD is applied at a later point.
@@ -906,6 +925,20 @@ func egressEpsHandler(_ context.Context, o client.Object) []reconcile.Request {
}
}
func egressPodsHandler(_ context.Context, o client.Object) []reconcile.Request {
if typ := o.GetLabels()[LabelParentType]; typ != proxyTypeProxyGroup {
return nil
}
return []reconcile.Request{
{
NamespacedName: types.NamespacedName{
Namespace: o.GetNamespace(),
Name: o.GetName(),
},
},
}
}
// egressEpsFromEgressPods returns a Pod event handler that checks if Pod is a replica for a ProxyGroup and if it is,
// returns reconciler requests for all egress EndpointSlices for that ProxyGroup.
func egressEpsFromPGPods(cl client.Client, ns string) handler.MapFunc {
@@ -1056,6 +1089,43 @@ func epsFromExternalNameService(cl client.Client, logger *zap.SugaredLogger, ns
}
}
func podsFromEgressEps(cl client.Client, logger *zap.SugaredLogger, ns string) handler.MapFunc {
return func(ctx context.Context, o client.Object) []reconcile.Request {
eps, ok := o.(*discoveryv1.EndpointSlice)
if !ok {
logger.Infof("[unexpected] EndpointSlice handler triggered for an object that is not a EndpointSlice")
return nil
}
if eps.Labels[labelProxyGroup] == "" {
return nil
}
if eps.Labels[labelSvcType] != "egress" {
return nil
}
podLabels := map[string]string{
LabelManaged: "true",
LabelParentType: "proxygroup",
LabelParentName: eps.Labels[labelProxyGroup],
}
podList := &corev1.PodList{}
if err := cl.List(ctx, podList, client.InNamespace(ns),
client.MatchingLabels(podLabels)); err != nil {
logger.Infof("error listing EndpointSlices: %v, skipping a reconcile for event on EndpointSlice %s", err, eps.Name)
return nil
}
reqs := make([]reconcile.Request, 0)
for _, pod := range podList.Items {
reqs = append(reqs, reconcile.Request{
NamespacedName: types.NamespacedName{
Namespace: pod.Namespace,
Name: pod.Name,
},
})
}
return reqs
}
}
// proxyClassesWithServiceMonitor returns an event handler that, given that the event is for the Prometheus
// ServiceMonitor CRD, returns all ProxyClasses that define that a ServiceMonitor should be created.
func proxyClassesWithServiceMonitor(cl client.Client, logger *zap.SugaredLogger) handler.MapFunc {