cmd/k8s-operator,k8s-operator: Allow the use of multiple tailnets (#18344)
This commit contains the implementation of multi-tailnet support within the Kubernetes Operator
Each of our custom resources now expose the `spec.tailnet` field. This field is a string that must match the name of an existing `Tailnet` resource. A `Tailnet` resource looks like this:
```yaml
apiVersion: tailscale.com/v1alpha1
kind: Tailnet
metadata:
name: example # This is the name that must be referenced by other resources
spec:
credentials:
secretName: example-oauth
```
Each `Tailnet` references a `Secret` resource that contains a set of oauth credentials. This secret must be created in the same namespace as the operator:
```yaml
apiVersion: v1
kind: Secret
metadata:
name: example-oauth # This is the name that's referenced by the Tailnet resource.
namespace: tailscale
stringData:
client_id: "client-id"
client_secret: "client-secret"
```
When created, the operator performs a basic check that the oauth client has access to all required scopes. This is done using read actions on devices, keys & services. While this doesn't capture a missing "write" permission, it catches completely missing permissions. Once this check passes, the `Tailnet` moves into a ready state and can be referenced. Attempting to use a `Tailnet` in a non-ready state will stall the deployment of `Connector`s, `ProxyGroup`s and `Recorder`s until the `Tailnet` becomes ready.
The `spec.tailnet` field informs the operator that a `Connector`, `ProxyGroup`, or `Recorder` must be given an auth key generated using the specified oauth client. For backwards compatibility, the set of credentials the operator is configured with are considered the default. That is, where `spec.tailnet` is not set, the resource will be deployed in the same tailnet as the operator.
Updates https://github.com/tailscale/corp/issues/34561
This commit is contained in:
@@ -49,11 +49,12 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed"
|
||||
reasonProxyGroupReady = "ProxyGroupReady"
|
||||
reasonProxyGroupAvailable = "ProxyGroupAvailable"
|
||||
reasonProxyGroupCreating = "ProxyGroupCreating"
|
||||
reasonProxyGroupInvalid = "ProxyGroupInvalid"
|
||||
reasonProxyGroupCreationFailed = "ProxyGroupCreationFailed"
|
||||
reasonProxyGroupReady = "ProxyGroupReady"
|
||||
reasonProxyGroupAvailable = "ProxyGroupAvailable"
|
||||
reasonProxyGroupCreating = "ProxyGroupCreating"
|
||||
reasonProxyGroupInvalid = "ProxyGroupInvalid"
|
||||
reasonProxyGroupTailnetUnavailable = "ProxyGroupTailnetUnavailable"
|
||||
|
||||
// Copied from k8s.io/apiserver/pkg/registry/generic/registry/store.go@cccad306d649184bf2a0e319ba830c53f65c445c
|
||||
optimisticLockErrorMsg = "the object has been modified; please apply your changes to the latest version and try again"
|
||||
@@ -117,6 +118,23 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
} else if err != nil {
|
||||
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err)
|
||||
}
|
||||
|
||||
tailscaleClient := r.tsClient
|
||||
if pg.Spec.Tailnet != "" {
|
||||
tc, err := clientForTailnet(ctx, r.Client, r.tsNamespace, pg.Spec.Tailnet)
|
||||
if err != nil {
|
||||
oldPGStatus := pg.Status.DeepCopy()
|
||||
nrr := ¬ReadyReason{
|
||||
reason: reasonProxyGroupTailnetUnavailable,
|
||||
message: err.Error(),
|
||||
}
|
||||
|
||||
return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, make(map[string][]netip.AddrPort)))
|
||||
}
|
||||
|
||||
tailscaleClient = tc
|
||||
}
|
||||
|
||||
if markedForDeletion(pg) {
|
||||
logger.Debugf("ProxyGroup is being deleted, cleaning up resources")
|
||||
ix := xslices.Index(pg.Finalizers, FinalizerName)
|
||||
@@ -125,7 +143,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
return reconcile.Result{}, nil
|
||||
}
|
||||
|
||||
if done, err := r.maybeCleanup(ctx, pg); err != nil {
|
||||
if done, err := r.maybeCleanup(ctx, tailscaleClient, pg); err != nil {
|
||||
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||
logger.Infof("optimistic lock error, retrying: %s", err)
|
||||
return reconcile.Result{}, nil
|
||||
@@ -144,7 +162,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
}
|
||||
|
||||
oldPGStatus := pg.Status.DeepCopy()
|
||||
staticEndpoints, nrr, err := r.reconcilePG(ctx, pg, logger)
|
||||
staticEndpoints, nrr, err := r.reconcilePG(ctx, tailscaleClient, pg, logger)
|
||||
return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, staticEndpoints))
|
||||
}
|
||||
|
||||
@@ -152,7 +170,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
// for deletion. It is separated out from Reconcile to make a clear separation
|
||||
// between reconciling the ProxyGroup, and posting the status of its created
|
||||
// resources onto the ProxyGroup status field.
|
||||
func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
if !slices.Contains(pg.Finalizers, FinalizerName) {
|
||||
// This log line is printed exactly once during initial provisioning,
|
||||
// because once the finalizer is in place this block gets skipped. So,
|
||||
@@ -193,7 +211,7 @@ func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, pg *tsapi.ProxyG
|
||||
return notReady(reasonProxyGroupInvalid, fmt.Sprintf("invalid ProxyGroup spec: %v", err))
|
||||
}
|
||||
|
||||
staticEndpoints, nrr, err := r.maybeProvision(ctx, pg, proxyClass)
|
||||
staticEndpoints, nrr, err := r.maybeProvision(ctx, tailscaleClient, pg, proxyClass)
|
||||
if err != nil {
|
||||
return nil, nrr, err
|
||||
}
|
||||
@@ -279,7 +297,7 @@ func (r *ProxyGroupReconciler) validate(ctx context.Context, pg *tsapi.ProxyGrou
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
logger := r.logger(pg.Name)
|
||||
r.mu.Lock()
|
||||
r.ensureAddedToGaugeForProxyGroup(pg)
|
||||
@@ -302,7 +320,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
|
||||
}
|
||||
}
|
||||
|
||||
staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, pg, proxyClass, svcToNodePorts)
|
||||
staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, tailscaleClient, pg, proxyClass, svcToNodePorts)
|
||||
if err != nil {
|
||||
var selectorErr *FindStaticEndpointErr
|
||||
if errors.As(err, &selectorErr) {
|
||||
@@ -414,7 +432,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, pg *tsapi.Pro
|
||||
return r.notReadyErrf(pg, logger, "error reconciling metrics resources: %w", err)
|
||||
}
|
||||
|
||||
if err := r.cleanupDanglingResources(ctx, pg, proxyClass); err != nil {
|
||||
if err := r.cleanupDanglingResources(ctx, tailscaleClient, pg, proxyClass); err != nil {
|
||||
return r.notReadyErrf(pg, logger, "error cleaning up dangling resources: %w", err)
|
||||
}
|
||||
|
||||
@@ -611,7 +629,7 @@ func (r *ProxyGroupReconciler) ensureNodePortServiceCreated(ctx context.Context,
|
||||
|
||||
// cleanupDanglingResources ensures we don't leak config secrets, state secrets, and
|
||||
// tailnet devices when the number of replicas specified is reduced.
|
||||
func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error {
|
||||
func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error {
|
||||
logger := r.logger(pg.Name)
|
||||
metadata, err := r.getNodeMetadata(ctx, pg)
|
||||
if err != nil {
|
||||
@@ -625,7 +643,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg
|
||||
|
||||
// Dangling resource, delete the config + state Secrets, as well as
|
||||
// deleting the device from the tailnet.
|
||||
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
|
||||
if err := r.deleteTailnetDevice(ctx, tailscaleClient, m.tsID, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.Delete(ctx, m.stateSecret); err != nil && !apierrors.IsNotFound(err) {
|
||||
@@ -668,7 +686,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, pg
|
||||
// maybeCleanup just deletes the device from the tailnet. All the kubernetes
|
||||
// resources linked to a ProxyGroup will get cleaned up via owner references
|
||||
// (which we can use because they are all in the same namespace).
|
||||
func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.ProxyGroup) (bool, error) {
|
||||
func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup) (bool, error) {
|
||||
logger := r.logger(pg.Name)
|
||||
|
||||
metadata, err := r.getNodeMetadata(ctx, pg)
|
||||
@@ -677,7 +695,7 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy
|
||||
}
|
||||
|
||||
for _, m := range metadata {
|
||||
if err := r.deleteTailnetDevice(ctx, m.tsID, logger); err != nil {
|
||||
if err := r.deleteTailnetDevice(ctx, tailscaleClient, m.tsID, logger); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
@@ -698,9 +716,9 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, pg *tsapi.Proxy
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
|
||||
func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, tailscaleClient tsClient, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
|
||||
logger.Debugf("deleting device %s from control", string(id))
|
||||
if err := r.tsClient.DeleteDevice(ctx, string(id)); err != nil {
|
||||
if err := tailscaleClient.DeleteDevice(ctx, string(id)); err != nil {
|
||||
errResp := &tailscale.ErrResponse{}
|
||||
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
|
||||
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
|
||||
@@ -714,7 +732,13 @@ func (r *ProxyGroupReconciler) deleteTailnetDevice(ctx context.Context, id tailc
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass, svcToNodePorts map[string]uint16) (endpoints map[string][]netip.AddrPort, err error) {
|
||||
func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(
|
||||
ctx context.Context,
|
||||
tailscaleClient tsClient,
|
||||
pg *tsapi.ProxyGroup,
|
||||
proxyClass *tsapi.ProxyClass,
|
||||
svcToNodePorts map[string]uint16,
|
||||
) (endpoints map[string][]netip.AddrPort, err error) {
|
||||
logger := r.logger(pg.Name)
|
||||
endpoints = make(map[string][]netip.AddrPort, pgReplicas(pg)) // keyed by Service name.
|
||||
for i := range pgReplicas(pg) {
|
||||
@@ -728,7 +752,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
}
|
||||
|
||||
var existingCfgSecret *corev1.Secret // unmodified copy of secret
|
||||
if err := r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil {
|
||||
if err = r.Get(ctx, client.ObjectKeyFromObject(cfgSecret), cfgSecret); err == nil {
|
||||
logger.Debugf("Secret %s/%s already exists", cfgSecret.GetNamespace(), cfgSecret.GetName())
|
||||
existingCfgSecret = cfgSecret.DeepCopy()
|
||||
} else if !apierrors.IsNotFound(err) {
|
||||
@@ -742,7 +766,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
if len(tags) == 0 {
|
||||
tags = r.defaultTags
|
||||
}
|
||||
key, err := newAuthKey(ctx, r.tsClient, tags)
|
||||
key, err := newAuthKey(ctx, tailscaleClient, tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -757,7 +781,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(ctx context.Context, p
|
||||
Namespace: r.tsNamespace,
|
||||
},
|
||||
}
|
||||
if err := r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) {
|
||||
if err = r.Get(ctx, client.ObjectKeyFromObject(stateSecret), stateSecret); err != nil && !apierrors.IsNotFound(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user