cmd/k8s-operator,k8s-operator: allow the operator to deploy exit nodes via Connector custom resource (#10724)
cmd/k8s-operator/deploy/crds,k8s-operator/apis/v1alpha1: allow to define an exit node via Connector CR. Make it possible to define an exit node to be deployed to a Kubernetes cluster via Connector Custom resource. Also changes to Connector API so that one Connector corresponds to one Tailnet node that can be either a subnet router or an exit node or both. The Kubernetes operator parses Connector custom resource and, if .spec.isExitNode is set, configures that Tailscale node deployed for that connector as an exit node. Signed-off-by: Irbe Krumina <irbe@tailscale.com> Co-authored-by: Anton Tolchanov <anton@tailscale.com>
This commit is contained in:
+123
-122
@@ -10,7 +10,6 @@ import (
|
||||
"fmt"
|
||||
"net/netip"
|
||||
"slices"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -33,17 +32,15 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
reasonSubnetRouterCreationFailed = "SubnetRouterCreationFailed"
|
||||
reasonSubnetRouterCreated = "SubnetRouterCreated"
|
||||
reasonSubnetRouterCleanupFailed = "SubnetRouterCleanupFailed"
|
||||
reasonSubnetRouterCleanupInProgress = "SubnetRouterCleanupInProgress"
|
||||
reasonSubnetRouterInvalid = "SubnetRouterInvalid"
|
||||
reasonConnectorCreationFailed = "ConnectorCreationFailed"
|
||||
|
||||
messageSubnetRouterCreationFailed = "Failed creating subnet router for routes %s: %v"
|
||||
messageSubnetRouterInvalid = "Subnet router is invalid: %v"
|
||||
messageSubnetRouterCreated = "Created subnet router for routes %s"
|
||||
messageSubnetRouterCleanupFailed = "Failed cleaning up subnet router resources: %v"
|
||||
msgSubnetRouterCleanupInProgress = "SubnetRouterCleanupInProgress"
|
||||
reasonConnectorCreated = "ConnectorCreated"
|
||||
reasonConnectorCleanupFailed = "ConnectorCleanupFailed"
|
||||
reasonConnectorCleanupInProgress = "ConnectorCleanupInProgress"
|
||||
reasonConnectorInvalid = "ConnectorInvalid"
|
||||
|
||||
messageConnectorCreationFailed = "Failed creating Connector: %v"
|
||||
messageConnectorInvalid = "Connector is invalid: %v"
|
||||
|
||||
shortRequeue = time.Second * 5
|
||||
)
|
||||
@@ -61,42 +58,44 @@ type ConnectorReconciler struct {
|
||||
|
||||
mu sync.Mutex // protects following
|
||||
|
||||
// subnetRouters tracks the subnet routers managed by this Tailscale
|
||||
// Operator instance.
|
||||
subnetRouters set.Slice[types.UID]
|
||||
subnetRouters set.Slice[types.UID] // for subnet routers gauge
|
||||
exitNodes set.Slice[types.UID] // for exit nodes gauge
|
||||
}
|
||||
|
||||
var (
|
||||
// gaugeIngressResources tracks the number of subnet routers that we're
|
||||
// currently managing.
|
||||
gaugeSubnetRouterResources = clientmetric.NewGauge("k8s_subnet_router_resources")
|
||||
// gaugeConnectorResources tracks the overall number of Connectors currently managed by this operator instance.
|
||||
gaugeConnectorResources = clientmetric.NewGauge("k8s_connector_resources")
|
||||
// gaugeConnectorSubnetRouterResources tracks the number of Connectors managed by this operator instance that are subnet routers.
|
||||
gaugeConnectorSubnetRouterResources = clientmetric.NewGauge("k8s_connector_subnetrouter_resources")
|
||||
// gaugeConnectorExitNodeResources tracks the number of Connectors currently managed by this operator instance that are exit nodes.
|
||||
gaugeConnectorExitNodeResources = clientmetric.NewGauge("k8s_connector_exitnode_resources")
|
||||
)
|
||||
|
||||
func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, err error) {
|
||||
logger := a.logger.With("connector", req.Name)
|
||||
func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Request) (res reconcile.Result, err error) {
|
||||
logger := a.logger.With("Connector", req.Name)
|
||||
logger.Debugf("starting reconcile")
|
||||
defer logger.Debugf("reconcile finished")
|
||||
|
||||
cn := new(tsapi.Connector)
|
||||
err = a.Get(ctx, req.NamespacedName, cn)
|
||||
if apierrors.IsNotFound(err) {
|
||||
logger.Debugf("connector not found, assuming it was deleted")
|
||||
logger.Debugf("Connector not found, assuming it was deleted")
|
||||
return reconcile.Result{}, nil
|
||||
} else if err != nil {
|
||||
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com Connector: %w", err)
|
||||
}
|
||||
if !cn.DeletionTimestamp.IsZero() {
|
||||
logger.Debugf("connector is being deleted or should not be exposed, cleaning up components")
|
||||
logger.Debugf("Connector is being deleted or should not be exposed, cleaning up resources")
|
||||
ix := xslices.Index(cn.Finalizers, FinalizerName)
|
||||
if ix < 0 {
|
||||
logger.Debugf("no finalizer, nothing to do")
|
||||
return reconcile.Result{}, nil
|
||||
}
|
||||
|
||||
if done, err := a.maybeCleanupSubnetRouter(ctx, logger, cn); err != nil {
|
||||
if done, err := a.maybeCleanupConnector(ctx, logger, cn); err != nil {
|
||||
return reconcile.Result{}, err
|
||||
} else if !done {
|
||||
logger.Debugf("cleanup not finished, will retry...")
|
||||
logger.Debugf("Connector resource cleanup not yet finished, will retry...")
|
||||
return reconcile.Result{RequeueAfter: shortRequeue}, nil
|
||||
}
|
||||
|
||||
@@ -104,93 +103,110 @@ func (a *ConnectorReconciler) Reconcile(ctx context.Context, req reconcile.Reque
|
||||
if err := a.Update(ctx, cn); err != nil {
|
||||
return reconcile.Result{}, err
|
||||
}
|
||||
logger.Infof("connector resources cleaned up")
|
||||
logger.Infof("Connector resources cleaned up")
|
||||
return reconcile.Result{}, nil
|
||||
}
|
||||
|
||||
oldCnStatus := cn.Status.DeepCopy()
|
||||
defer func() {
|
||||
if cn.Status.SubnetRouter == nil {
|
||||
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionUnknown, "", "", cn.Generation, a.clock, logger)
|
||||
} else if cn.Status.SubnetRouter.Ready == metav1.ConditionTrue {
|
||||
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonSubnetRouterCreated, reasonSubnetRouterCreated, cn.Generation, a.clock, logger)
|
||||
} else {
|
||||
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, metav1.ConditionFalse, cn.Status.SubnetRouter.Reason, cn.Status.SubnetRouter.Reason, cn.Generation, a.clock, logger)
|
||||
}
|
||||
setStatus := func(cn *tsapi.Connector, conditionType tsapi.ConnectorConditionType, status metav1.ConditionStatus, reason, message string) (reconcile.Result, error) {
|
||||
tsoperator.SetConnectorCondition(cn, tsapi.ConnectorReady, status, reason, message, cn.Generation, a.clock, logger)
|
||||
if !apiequality.Semantic.DeepEqual(oldCnStatus, cn.Status) {
|
||||
// an error encountered here should get returned by the Reconcile function
|
||||
// An error encountered here should get returned by the Reconcile function.
|
||||
if updateErr := a.Client.Status().Update(ctx, cn); updateErr != nil {
|
||||
err = updateErr
|
||||
err = errors.Wrap(err, updateErr.Error())
|
||||
}
|
||||
}
|
||||
}()
|
||||
return res, err
|
||||
}
|
||||
|
||||
if !slices.Contains(cn.Finalizers, FinalizerName) {
|
||||
// This log line is printed exactly once during initial provisioning,
|
||||
// because once the finalizer is in place this block gets skipped. So,
|
||||
// this is a nice place to tell the operator that the high level,
|
||||
// multi-reconcile operation is underway.
|
||||
logger.Infof("ensuring connector is set up")
|
||||
logger.Infof("ensuring Connector is set up")
|
||||
cn.Finalizers = append(cn.Finalizers, FinalizerName)
|
||||
if err := a.Update(ctx, cn); err != nil {
|
||||
err = fmt.Errorf("failed to add finalizer: %w", err)
|
||||
logger.Errorf("error adding finalizer: %v", err)
|
||||
return reconcile.Result{}, err
|
||||
logger.Errorf("error adding finalizer: %w", err)
|
||||
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, reasonConnectorCreationFailed)
|
||||
}
|
||||
}
|
||||
|
||||
// A Connector with unset .spec.subnetRouter and unset
|
||||
// cn.spec.subnetRouter.Routes will be rejected at apply time (because
|
||||
// these fields are set as required by our CRD validation). This check
|
||||
// is here for if our CRD validation breaks unnoticed we don't crash the
|
||||
// operator with nil pointer exception.
|
||||
if cn.Spec.SubnetRouter == nil || len(cn.Spec.SubnetRouter.Routes) < 1 {
|
||||
return reconcile.Result{}, nil
|
||||
if err := a.validate(cn); err != nil {
|
||||
logger.Errorf("error validating Connector spec: %w", err)
|
||||
message := fmt.Sprintf(messageConnectorInvalid, err)
|
||||
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorInvalid, message)
|
||||
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorInvalid, message)
|
||||
}
|
||||
|
||||
if err := validateSubnetRouter(*cn.Spec.SubnetRouter); err != nil {
|
||||
msg := fmt.Sprintf(messageSubnetRouterInvalid, err)
|
||||
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
|
||||
Ready: metav1.ConditionFalse,
|
||||
Reason: reasonSubnetRouterInvalid,
|
||||
Message: msg,
|
||||
}
|
||||
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonSubnetRouterInvalid, msg)
|
||||
return reconcile.Result{}, nil
|
||||
if err = a.maybeProvisionConnector(ctx, logger, cn); err != nil {
|
||||
logger.Errorf("error creating Connector resources: %w", err)
|
||||
message := fmt.Sprintf(messageConnectorCreationFailed, err)
|
||||
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonConnectorCreationFailed, message)
|
||||
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionFalse, reasonConnectorCreationFailed, message)
|
||||
}
|
||||
|
||||
var sb strings.Builder
|
||||
sb.WriteString(string(cn.Spec.SubnetRouter.Routes[0]))
|
||||
for _, r := range cn.Spec.SubnetRouter.Routes[1:] {
|
||||
sb.WriteString(fmt.Sprintf(",%s", r))
|
||||
logger.Info("Connector resources synced")
|
||||
cn.Status.IsExitNode = cn.Spec.ExitNode
|
||||
if cn.Spec.SubnetRouter != nil {
|
||||
cn.Status.SubnetRoutes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
||||
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
||||
}
|
||||
cidrsS := sb.String()
|
||||
logger.Debugf("ensuring a subnet router is deployed")
|
||||
err = a.maybeProvisionSubnetRouter(ctx, logger, cn, cidrsS)
|
||||
if err != nil {
|
||||
msg := fmt.Sprintf(messageSubnetRouterCreationFailed, cidrsS, err)
|
||||
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
|
||||
Ready: metav1.ConditionFalse,
|
||||
Reason: reasonSubnetRouterCreationFailed,
|
||||
Message: msg,
|
||||
}
|
||||
a.recorder.Eventf(cn, corev1.EventTypeWarning, reasonSubnetRouterCreationFailed, msg)
|
||||
return reconcile.Result{}, err
|
||||
}
|
||||
cn.Status.SubnetRouter = &tsapi.SubnetRouterStatus{
|
||||
Routes: cidrsS,
|
||||
Ready: metav1.ConditionTrue,
|
||||
Reason: reasonSubnetRouterCreated,
|
||||
Message: fmt.Sprintf(messageSubnetRouterCreated, cidrsS),
|
||||
}
|
||||
return reconcile.Result{}, nil
|
||||
cn.Status.SubnetRoutes = ""
|
||||
return setStatus(cn, tsapi.ConnectorReady, metav1.ConditionTrue, reasonConnectorCreated, reasonConnectorCreated)
|
||||
}
|
||||
|
||||
func (a *ConnectorReconciler) maybeCleanupSubnetRouter(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
|
||||
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "subnetrouter")); err != nil {
|
||||
return false, fmt.Errorf("failed to cleanup: %w", err)
|
||||
// maybeProvisionConnector ensures that any new resources required for this
|
||||
// Connector instance are deployed to the cluster.
|
||||
func (a *ConnectorReconciler) maybeProvisionConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) error {
|
||||
hostname := cn.Name + "-connector"
|
||||
if cn.Spec.Hostname != "" {
|
||||
hostname = string(cn.Spec.Hostname)
|
||||
}
|
||||
crl := childResourceLabels(cn.Name, a.tsnamespace, "connector")
|
||||
sts := &tailscaleSTSConfig{
|
||||
ParentResourceName: cn.Name,
|
||||
ParentResourceUID: string(cn.UID),
|
||||
Hostname: hostname,
|
||||
ChildResourceLabels: crl,
|
||||
Tags: cn.Spec.Tags.Stringify(),
|
||||
Connector: &connector{
|
||||
isExitNode: cn.Spec.ExitNode,
|
||||
},
|
||||
}
|
||||
|
||||
if cn.Spec.SubnetRouter != nil && len(cn.Spec.SubnetRouter.AdvertiseRoutes) > 0 {
|
||||
sts.Connector.routes = cn.Spec.SubnetRouter.AdvertiseRoutes.Stringify()
|
||||
}
|
||||
|
||||
a.mu.Lock()
|
||||
if sts.Connector.isExitNode {
|
||||
a.exitNodes.Add(cn.UID)
|
||||
} else {
|
||||
a.exitNodes.Remove(cn.UID)
|
||||
}
|
||||
if sts.Connector.routes != "" {
|
||||
a.subnetRouters.Add(cn.GetUID())
|
||||
} else {
|
||||
a.subnetRouters.Remove(cn.GetUID())
|
||||
}
|
||||
a.mu.Unlock()
|
||||
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
||||
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
||||
var connectors set.Slice[types.UID]
|
||||
connectors.AddSlice(a.exitNodes.Slice())
|
||||
connectors.AddSlice(a.subnetRouters.Slice())
|
||||
gaugeConnectorResources.Set(int64(connectors.Len()))
|
||||
|
||||
_, err := a.ssr.Provision(ctx, logger, sts)
|
||||
return err
|
||||
}
|
||||
|
||||
func (a *ConnectorReconciler) maybeCleanupConnector(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector) (bool, error) {
|
||||
if done, err := a.ssr.Cleanup(ctx, logger, childResourceLabels(cn.Name, a.tsnamespace, "connector")); err != nil {
|
||||
return false, fmt.Errorf("failed to cleanup Connector resources: %w", err)
|
||||
} else if !done {
|
||||
logger.Debugf("cleanup not done yet, waiting for next reconcile")
|
||||
logger.Debugf("Connector cleanup not done yet, waiting for next reconcile")
|
||||
return false, nil
|
||||
}
|
||||
|
||||
@@ -198,44 +214,39 @@ func (a *ConnectorReconciler) maybeCleanupSubnetRouter(ctx context.Context, logg
|
||||
// exactly once at the very end of cleanup, because the final step of
|
||||
// cleanup removes the tailscale finalizer, which will make all future
|
||||
// reconciles exit early.
|
||||
logger.Infof("cleaned up subnet router")
|
||||
logger.Infof("cleaned up Connector resources")
|
||||
a.mu.Lock()
|
||||
defer a.mu.Unlock()
|
||||
a.subnetRouters.Remove(cn.UID)
|
||||
gaugeSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
||||
a.exitNodes.Remove(cn.UID)
|
||||
a.mu.Unlock()
|
||||
gaugeConnectorExitNodeResources.Set(int64(a.exitNodes.Len()))
|
||||
gaugeConnectorSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
||||
var connectors set.Slice[types.UID]
|
||||
connectors.AddSlice(a.exitNodes.Slice())
|
||||
connectors.AddSlice(a.subnetRouters.Slice())
|
||||
gaugeConnectorResources.Set(int64(connectors.Len()))
|
||||
return true, nil
|
||||
}
|
||||
|
||||
// maybeProvisionSubnetRouter maybe deploys subnet router that exposes a subset of cluster cidrs to the tailnet
|
||||
func (a *ConnectorReconciler) maybeProvisionSubnetRouter(ctx context.Context, logger *zap.SugaredLogger, cn *tsapi.Connector, cidrs string) error {
|
||||
if cn.Spec.SubnetRouter == nil || len(cn.Spec.SubnetRouter.Routes) < 1 {
|
||||
func (a *ConnectorReconciler) validate(cn *tsapi.Connector) error {
|
||||
// Connector fields are already validated at apply time with CEL validation
|
||||
// on custom resource fields. The checks here are a backup in case the
|
||||
// CEL validation breaks without us noticing.
|
||||
if !(cn.Spec.SubnetRouter != nil || cn.Spec.ExitNode) {
|
||||
return errors.New("invalid spec: a Connector must expose subnet routes or act as an exit node (or both)")
|
||||
}
|
||||
if cn.Spec.SubnetRouter == nil {
|
||||
return nil
|
||||
}
|
||||
a.mu.Lock()
|
||||
a.subnetRouters.Add(cn.UID)
|
||||
gaugeSubnetRouterResources.Set(int64(a.subnetRouters.Len()))
|
||||
a.mu.Unlock()
|
||||
|
||||
crl := childResourceLabels(cn.Name, a.tsnamespace, "subnetrouter")
|
||||
hostname := hostnameForSubnetRouter(cn)
|
||||
sts := &tailscaleSTSConfig{
|
||||
ParentResourceName: cn.Name,
|
||||
ParentResourceUID: string(cn.UID),
|
||||
Hostname: hostname,
|
||||
ChildResourceLabels: crl,
|
||||
Routes: cidrs,
|
||||
}
|
||||
for _, tag := range cn.Spec.SubnetRouter.Tags {
|
||||
sts.Tags = append(sts.Tags, string(tag))
|
||||
}
|
||||
|
||||
_, err := a.ssr.Provision(ctx, logger, sts)
|
||||
|
||||
return err
|
||||
return validateSubnetRouter(cn.Spec.SubnetRouter)
|
||||
}
|
||||
func validateSubnetRouter(sb tsapi.SubnetRouter) error {
|
||||
|
||||
func validateSubnetRouter(sb *tsapi.SubnetRouter) error {
|
||||
if len(sb.AdvertiseRoutes) < 1 {
|
||||
return errors.New("invalid subnet router spec: no routes defined")
|
||||
}
|
||||
var err error
|
||||
for _, route := range sb.Routes {
|
||||
for _, route := range sb.AdvertiseRoutes {
|
||||
pfx, e := netip.ParsePrefix(string(route))
|
||||
if e != nil {
|
||||
err = errors.Wrap(err, fmt.Sprintf("route %s is invalid: %v", route, err))
|
||||
@@ -247,13 +258,3 @@ func validateSubnetRouter(sb tsapi.SubnetRouter) error {
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func hostnameForSubnetRouter(cn *tsapi.Connector) string {
|
||||
if cn.Spec.SubnetRouter == nil {
|
||||
return ""
|
||||
}
|
||||
if cn.Spec.SubnetRouter.Hostname != "" {
|
||||
return string(cn.Spec.SubnetRouter.Hostname)
|
||||
}
|
||||
return cn.Name + "-" + "subnetrouter"
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user