cmd/containerboot,cmd/k8s-operator: reload tailscaled config (#14342)

cmd/{k8s-operator,containerboot}: reload tailscaled configfile when its contents have changed

Instead of restarting the Kubernetes Operator proxies each time
tailscaled config has changed, this dynamically reloads the configfile
using the new reload endpoint.
Older annotation based mechanism will be supported till 1.84
to ensure that proxy versions prior to 1.80 keep working with
operator 1.80 and newer.

Updates tailscale/tailscale#13032
Updates tailscale/corp#24795

Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
Irbe Krumina
2025-01-10 07:29:11 +00:00
committed by GitHub
parent fc8b6d9c6a
commit 48a95c422a
8 changed files with 207 additions and 39 deletions
+65 -8
View File
@@ -261,17 +261,44 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
return fmt.Errorf("error provisioning ConfigMap: %w", err)
}
}
ss, err := pgStatefulSet(pg, r.tsNamespace, r.proxyImage, r.tsFirewallMode, cfgHash)
ss, err := pgStatefulSet(pg, r.tsNamespace, r.proxyImage, r.tsFirewallMode)
if err != nil {
return fmt.Errorf("error generating StatefulSet spec: %w", err)
}
ss = applyProxyClassToStatefulSet(proxyClass, ss, nil, logger)
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
capver, err := r.capVerForPG(ctx, pg, logger)
if err != nil {
return fmt.Errorf("error getting device info: %w", err)
}
updateSS := func(s *appsv1.StatefulSet) {
// This is a temporary workaround to ensure that egress ProxyGroup proxies with capver older than 110
// are restarted when tailscaled configfile contents have changed.
// This workaround ensures that:
// 1. The hash mechanism is used to trigger pod restarts for proxies below capver 110.
// 2. Proxies above capver are not unnecessarily restarted when the configfile contents change.
// 3. If the hash has alreay been set, but the capver is above 110, the old hash is preserved to avoid
// unnecessary pod restarts that could result in an update loop where capver cannot be determined for a
// restarting Pod and the hash is re-added again.
// Note that this workaround is only applied to egress ProxyGroups, because ingress ProxyGroup was added after capver 110.
// Note also that the hash annotation is only set on updates, not creation, because if the StatefulSet is
// being created, there is no need for a restart.
// TODO(irbekrm): remove this in 1.84.
hash := cfgHash
if capver >= 110 {
hash = s.Spec.Template.GetAnnotations()[podAnnotationLastSetConfigFileHash]
}
s.Spec = ss.Spec
if hash != "" && pg.Spec.Type == tsapi.ProxyGroupTypeEgress {
mak.Set(&s.Spec.Template.Annotations, podAnnotationLastSetConfigFileHash, hash)
}
s.ObjectMeta.Labels = ss.ObjectMeta.Labels
s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations
s.ObjectMeta.OwnerReferences = ss.ObjectMeta.OwnerReferences
s.Spec = ss.Spec
}); err != nil {
}
if _, err := createOrUpdate(ctx, r.Client, r.tsNamespace, ss, updateSS); err != nil {
return fmt.Errorf("error provisioning StatefulSet: %w", err)
}
mo := &metricsOpts{
@@ -564,12 +591,19 @@ func (r *ProxyGroupReconciler) getNodeMetadata(ctx context.Context, pg *tsapi.Pr
continue
}
metadata = append(metadata, nodeMetadata{
nm := nodeMetadata{
ordinal: ordinal,
stateSecret: &secret,
tsID: id,
dnsName: dnsName,
})
}
pod := &corev1.Pod{}
if err := r.Get(ctx, client.ObjectKey{Namespace: r.tsNamespace, Name: secret.Name}, pod); err != nil && !apierrors.IsNotFound(err) {
return nil, err
} else if err == nil {
nm.podUID = string(pod.UID)
}
metadata = append(metadata, nm)
}
return metadata, nil
@@ -601,6 +635,29 @@ func (r *ProxyGroupReconciler) getDeviceInfo(ctx context.Context, pg *tsapi.Prox
type nodeMetadata struct {
ordinal int
stateSecret *corev1.Secret
tsID tailcfg.StableNodeID
dnsName string
// podUID is the UID of the current Pod or empty if the Pod does not exist.
podUID string
tsID tailcfg.StableNodeID
dnsName string
}
// capVerForPG returns best effort capability version for the given ProxyGroup. It attempts to find it by looking at the
// Secret + Pod for the replica with ordinal 0. Returns -1 if it is not possible to determine the capability version
// (i.e there is no Pod yet).
func (r *ProxyGroupReconciler) capVerForPG(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (tailcfg.CapabilityVersion, error) {
metas, err := r.getNodeMetadata(ctx, pg)
if err != nil {
return -1, fmt.Errorf("error getting node metadata: %w", err)
}
if len(metas) == 0 {
return -1, nil
}
dev, err := deviceInfo(metas[0].stateSecret, metas[0].podUID, logger)
if err != nil {
return -1, fmt.Errorf("error getting device info: %w", err)
}
if dev == nil {
return -1, nil
}
return dev.capver, nil
}