cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services for ProxyGroup (#13746)
cmd/k8s-operator,k8s-operator/apis: set a readiness condition on egress Services Set a readiness condition on ExternalName Services that define a tailnet target to route cluster traffic to via a ProxyGroup's proxies. The condition is set to true if at least one proxy is currently set up to route. Updates tailscale/tailscale#13406 Signed-off-by: Irbe Krumina <irbe@tailscale.com>
This commit is contained in:
@@ -0,0 +1,179 @@
|
||||
// Copyright (c) Tailscale Inc & AUTHORS
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
//go:build !plan9
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"go.uber.org/zap"
|
||||
appsv1 "k8s.io/api/apps/v1"
|
||||
corev1 "k8s.io/api/core/v1"
|
||||
discoveryv1 "k8s.io/api/discovery/v1"
|
||||
apiequality "k8s.io/apimachinery/pkg/api/equality"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
tsoperator "tailscale.com/k8s-operator"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/tstime"
|
||||
)
|
||||
|
||||
const (
|
||||
reasonReadinessCheckFailed = "ReadinessCheckFailed"
|
||||
reasonClusterResourcesNotReady = "ClusterResourcesNotReady"
|
||||
reasonNoProxies = "NoProxiesConfigured"
|
||||
reasonNotReady = "NotReadyToRouteTraffic"
|
||||
reasonReady = "ReadyToRouteTraffic"
|
||||
reasonPartiallyReady = "PartiallyReadyToRouteTraffic"
|
||||
msgReadyToRouteTemplate = "%d out of %d replicas are ready to route traffic"
|
||||
)
|
||||
|
||||
type egressSvcsReadinessReconciler struct {
|
||||
client.Client
|
||||
logger *zap.SugaredLogger
|
||||
clock tstime.Clock
|
||||
tsNamespace string
|
||||
}
|
||||
|
||||
// Reconcile reconciles an ExternalName Service that defines a tailnet target to be exposed on a ProxyGroup and sets the
|
||||
// EgressSvcReady condition on it. The condition gets set to true if at least one of the proxies is currently ready to
|
||||
// route traffic to the target. It compares proxy Pod IPs with the endpoints set on the EndpointSlice for the egress
|
||||
// service to determine how many replicas are currently able to route traffic.
|
||||
func (esrr *egressSvcsReadinessReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
||||
l := esrr.logger.With("Service", req.NamespacedName)
|
||||
defer l.Info("reconcile finished")
|
||||
|
||||
svc := new(corev1.Service)
|
||||
if err = esrr.Get(ctx, req.NamespacedName, svc); apierrors.IsNotFound(err) {
|
||||
l.Info("Service not found")
|
||||
return res, nil
|
||||
} else if err != nil {
|
||||
return res, fmt.Errorf("failed to get Service: %w", err)
|
||||
}
|
||||
var (
|
||||
reason, msg string
|
||||
st metav1.ConditionStatus = metav1.ConditionUnknown
|
||||
)
|
||||
oldStatus := svc.Status.DeepCopy()
|
||||
defer func() {
|
||||
tsoperator.SetServiceCondition(svc, tsapi.EgressSvcReady, st, reason, msg, esrr.clock, l)
|
||||
if !apiequality.Semantic.DeepEqual(oldStatus, svc.Status) {
|
||||
err = errors.Join(err, esrr.Status().Update(ctx, svc))
|
||||
}
|
||||
}()
|
||||
|
||||
crl := egressSvcChildResourceLabels(svc)
|
||||
eps, err := getSingleObject[discoveryv1.EndpointSlice](ctx, esrr.Client, esrr.tsNamespace, crl)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error getting EndpointSlice: %w", err)
|
||||
reason = reasonReadinessCheckFailed
|
||||
msg = err.Error()
|
||||
return res, err
|
||||
}
|
||||
if eps == nil {
|
||||
l.Infof("EndpointSlice for Service does not yet exist, waiting...")
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
pg := &tsapi.ProxyGroup{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: svc.Annotations[AnnotationProxyGroup],
|
||||
},
|
||||
}
|
||||
err = esrr.Get(ctx, client.ObjectKeyFromObject(pg), pg)
|
||||
if apierrors.IsNotFound(err) {
|
||||
l.Infof("ProxyGroup for Service does not exist, waiting...")
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error retrieving ProxyGroup: %w", err)
|
||||
reason = reasonReadinessCheckFailed
|
||||
msg = err.Error()
|
||||
return res, err
|
||||
}
|
||||
if !tsoperator.ProxyGroupIsReady(pg) {
|
||||
l.Infof("ProxyGroup for Service is not ready, waiting...")
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
|
||||
replicas := pgReplicas(pg)
|
||||
if replicas == 0 {
|
||||
l.Infof("ProxyGroup replicas set to 0")
|
||||
reason, msg = reasonNoProxies, reasonNoProxies
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
podLabels := pgLabels(pg.Name, nil)
|
||||
var readyReplicas int32
|
||||
for i := range replicas {
|
||||
podLabels[appsv1.PodIndexLabel] = fmt.Sprintf("%d", i)
|
||||
pod, err := getSingleObject[corev1.Pod](ctx, esrr.Client, esrr.tsNamespace, podLabels)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("error retrieving ProxyGroup Pod: %w", err)
|
||||
reason = reasonReadinessCheckFailed
|
||||
msg = err.Error()
|
||||
return res, err
|
||||
}
|
||||
if pod == nil {
|
||||
l.Infof("[unexpected] ProxyGroup is ready, but replica %d was not found", i)
|
||||
reason, msg = reasonClusterResourcesNotReady, reasonClusterResourcesNotReady
|
||||
return res, nil
|
||||
}
|
||||
l.Infof("looking at Pod with IPs %v", pod.Status.PodIPs)
|
||||
ready := false
|
||||
for _, ep := range eps.Endpoints {
|
||||
l.Infof("looking at endpoint with addresses %v", ep.Addresses)
|
||||
if endpointReadyForPod(&ep, pod, l) {
|
||||
l.Infof("endpoint is ready for Pod")
|
||||
ready = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if ready {
|
||||
readyReplicas++
|
||||
}
|
||||
}
|
||||
msg = fmt.Sprintf(msgReadyToRouteTemplate, readyReplicas, replicas)
|
||||
if readyReplicas == 0 {
|
||||
reason = reasonNotReady
|
||||
st = metav1.ConditionFalse
|
||||
return res, nil
|
||||
}
|
||||
st = metav1.ConditionTrue
|
||||
if readyReplicas < replicas {
|
||||
reason = reasonPartiallyReady
|
||||
} else {
|
||||
reason = reasonReady
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
// endpointReadyForPod returns true if the endpoint is for the Pod's IPv4 address and is ready to serve traffic.
|
||||
// Endpoint must not be nil.
|
||||
func endpointReadyForPod(ep *discoveryv1.Endpoint, pod *corev1.Pod, l *zap.SugaredLogger) bool {
|
||||
podIP, err := podIPv4(pod)
|
||||
if err != nil {
|
||||
l.Infof("[unexpected] error retrieving Pod's IPv4 address: %v", err)
|
||||
return false
|
||||
}
|
||||
// Currently we only ever set a single address on and Endpoint and nothing else is meant to modify this.
|
||||
if len(ep.Addresses) != 1 {
|
||||
return false
|
||||
}
|
||||
return strings.EqualFold(ep.Addresses[0], podIP) &&
|
||||
*ep.Conditions.Ready &&
|
||||
*ep.Conditions.Serving &&
|
||||
!*ep.Conditions.Terminating
|
||||
}
|
||||
Reference in New Issue
Block a user