cmd/k8s-operator: migrate to tailscale-client-go-v2 (#19010)
This commit modifies the kubernetes operator to use the `tailscale-client-go-v2` package instead of the internal tailscale client it was previously using. This now gives us the ability to expand out custom resources and features as they become available via the API module. The tailnet reconciler has also been modified to manage clients as tailnets are created and removed, providing each subsequent reconciler with a single `ClientProvider` that obtains a tailscale client for the respective tailnet by name, or the operator's default when presented with a blank string. Fixes: https://github.com/tailscale/corp/issues/38418 Signed-off-by: David Bond <davidsbond93@gmail.com>
This commit is contained in:
@@ -10,7 +10,6 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"slices"
|
||||
"sort"
|
||||
@@ -33,11 +32,12 @@ import (
|
||||
"k8s.io/client-go/tools/record"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
"tailscale.com/client/tailscale/v2"
|
||||
|
||||
"tailscale.com/client/tailscale"
|
||||
"tailscale.com/ipn"
|
||||
tsoperator "tailscale.com/k8s-operator"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/k8s-operator/tsclient"
|
||||
"tailscale.com/kube/egressservices"
|
||||
"tailscale.com/kube/k8s-proxy/conf"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
@@ -85,7 +85,7 @@ type ProxyGroupReconciler struct {
|
||||
log *zap.SugaredLogger
|
||||
recorder record.EventRecorder
|
||||
clock tstime.Clock
|
||||
tsClient tsClient
|
||||
clients ClientProvider
|
||||
|
||||
// User-specified defaults from the helm installation.
|
||||
tsNamespace string
|
||||
@@ -122,7 +122,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
return reconcile.Result{}, fmt.Errorf("failed to get tailscale.com ProxyGroup: %w", err)
|
||||
}
|
||||
|
||||
tailscaleClient, loginUrl, err := r.getClientAndLoginURL(ctx, pg.Spec.Tailnet)
|
||||
tsClient, err := r.clients.For(pg.Spec.Tailnet)
|
||||
if err != nil {
|
||||
oldPGStatus := pg.Status.DeepCopy()
|
||||
nrr := ¬ReadyReason{
|
||||
@@ -141,7 +141,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
return reconcile.Result{}, nil
|
||||
}
|
||||
|
||||
if done, err := r.maybeCleanup(ctx, tailscaleClient, pg); err != nil {
|
||||
if done, err := r.maybeCleanup(ctx, tsClient, pg); err != nil {
|
||||
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||
logger.Infof("optimistic lock error, retrying: %s", err)
|
||||
return reconcile.Result{}, nil
|
||||
@@ -160,7 +160,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
}
|
||||
|
||||
oldPGStatus := pg.Status.DeepCopy()
|
||||
staticEndpoints, nrr, err := r.reconcilePG(ctx, tailscaleClient, loginUrl, pg, logger)
|
||||
staticEndpoints, nrr, err := r.reconcilePG(ctx, tsClient, pg, logger)
|
||||
return reconcile.Result{}, errors.Join(err, r.maybeUpdateStatus(ctx, logger, pg, oldPGStatus, nrr, staticEndpoints))
|
||||
}
|
||||
|
||||
@@ -168,7 +168,7 @@ func (r *ProxyGroupReconciler) Reconcile(ctx context.Context, req reconcile.Requ
|
||||
// for deletion. It is separated out from Reconcile to make a clear separation
|
||||
// between reconciling the ProxyGroup, and posting the status of its created
|
||||
// resources onto the ProxyGroup status field.
|
||||
func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient tsClient, loginUrl string, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tsClient tsclient.Client, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
if !slices.Contains(pg.Finalizers, FinalizerName) {
|
||||
// This log line is printed exactly once during initial provisioning,
|
||||
// because once the finalizer is in place this block gets skipped. So,
|
||||
@@ -209,7 +209,7 @@ func (r *ProxyGroupReconciler) reconcilePG(ctx context.Context, tailscaleClient
|
||||
return notReady(reasonProxyGroupInvalid, fmt.Sprintf("invalid ProxyGroup spec: %v", err))
|
||||
}
|
||||
|
||||
staticEndpoints, nrr, err := r.maybeProvision(ctx, tailscaleClient, loginUrl, pg, proxyClass)
|
||||
staticEndpoints, nrr, err := r.maybeProvision(ctx, tsClient, pg, proxyClass)
|
||||
if err != nil {
|
||||
return nil, nrr, err
|
||||
}
|
||||
@@ -295,7 +295,7 @@ func (r *ProxyGroupReconciler) validate(ctx context.Context, pg *tsapi.ProxyGrou
|
||||
return errors.Join(errs...)
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, loginUrl string, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tsClient tsclient.Client, pg *tsapi.ProxyGroup, proxyClass *tsapi.ProxyClass) (map[string][]netip.AddrPort, *notReadyReason, error) {
|
||||
logger := r.logger(pg.Name)
|
||||
r.mu.Lock()
|
||||
r.ensureStateAddedForProxyGroup(pg)
|
||||
@@ -317,7 +317,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClie
|
||||
}
|
||||
}
|
||||
|
||||
staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, tailscaleClient, loginUrl, pg, proxyClass, svcToNodePorts)
|
||||
staticEndpoints, err := r.ensureConfigSecretsCreated(ctx, tsClient, pg, proxyClass, svcToNodePorts)
|
||||
if err != nil {
|
||||
if _, ok := errors.AsType[*FindStaticEndpointErr](err); ok {
|
||||
reason := reasonProxyGroupCreationFailed
|
||||
@@ -428,7 +428,7 @@ func (r *ProxyGroupReconciler) maybeProvision(ctx context.Context, tailscaleClie
|
||||
return r.notReadyErrf(pg, logger, "error reconciling metrics resources: %w", err)
|
||||
}
|
||||
|
||||
if err := r.cleanupDanglingResources(ctx, tailscaleClient, pg, proxyClass); err != nil {
|
||||
if err := r.cleanupDanglingResources(ctx, tsClient, pg, proxyClass); err != nil {
|
||||
return r.notReadyErrf(pg, logger, "error cleaning up dangling resources: %w", err)
|
||||
}
|
||||
|
||||
@@ -625,7 +625,7 @@ func (r *ProxyGroupReconciler) ensureNodePortServiceCreated(ctx context.Context,
|
||||
|
||||
// cleanupDanglingResources ensures we don't leak config secrets, state secrets, and
|
||||
// tailnet devices when the number of replicas specified is reduced.
|
||||
func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error {
|
||||
func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tsClient tsclient.Client, pg *tsapi.ProxyGroup, pc *tsapi.ProxyClass) error {
|
||||
logger := r.logger(pg.Name)
|
||||
metadata, err := getNodeMetadata(ctx, pg, r.Client, r.tsNamespace)
|
||||
if err != nil {
|
||||
@@ -639,7 +639,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tai
|
||||
|
||||
// Dangling resource, delete the config + state Secrets, as well as
|
||||
// deleting the device from the tailnet.
|
||||
if err := r.ensureDeviceDeleted(ctx, tailscaleClient, m.tsID, logger); err != nil {
|
||||
if err := r.ensureDeviceDeleted(ctx, tsClient, m.tsID, logger); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := r.Delete(ctx, m.stateSecret); err != nil && !apierrors.IsNotFound(err) {
|
||||
@@ -682,7 +682,7 @@ func (r *ProxyGroupReconciler) cleanupDanglingResources(ctx context.Context, tai
|
||||
// maybeCleanup just deletes the device from the tailnet. All the kubernetes
|
||||
// resources linked to a ProxyGroup will get cleaned up via owner references
|
||||
// (which we can use because they are all in the same namespace).
|
||||
func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup) (bool, error) {
|
||||
func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tsClient tsclient.Client, pg *tsapi.ProxyGroup) (bool, error) {
|
||||
logger := r.logger(pg.Name)
|
||||
|
||||
metadata, err := getNodeMetadata(ctx, pg, r.Client, r.tsNamespace)
|
||||
@@ -691,7 +691,7 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tailscaleClient
|
||||
}
|
||||
|
||||
for _, m := range metadata {
|
||||
if err := r.ensureDeviceDeleted(ctx, tailscaleClient, m.tsID, logger); err != nil {
|
||||
if err := r.ensureDeviceDeleted(ctx, tsClient, m.tsID, logger); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
@@ -712,25 +712,23 @@ func (r *ProxyGroupReconciler) maybeCleanup(ctx context.Context, tailscaleClient
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) ensureDeviceDeleted(ctx context.Context, tailscaleClient tsClient, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
|
||||
func (r *ProxyGroupReconciler) ensureDeviceDeleted(ctx context.Context, tsClient tsclient.Client, id tailcfg.StableNodeID, logger *zap.SugaredLogger) error {
|
||||
logger.Debugf("deleting device %s from control", string(id))
|
||||
if err := tailscaleClient.DeleteDevice(ctx, string(id)); err != nil {
|
||||
if errResp, ok := errors.AsType[tailscale.ErrResponse](err); ok && errResp.Status == http.StatusNotFound {
|
||||
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
|
||||
} else {
|
||||
return fmt.Errorf("error deleting device: %w", err)
|
||||
}
|
||||
} else {
|
||||
logger.Debugf("device %s deleted from control", string(id))
|
||||
err := tsClient.Devices().Delete(ctx, string(id))
|
||||
switch {
|
||||
case tailscale.IsNotFound(err):
|
||||
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(id))
|
||||
case err != nil:
|
||||
return fmt.Errorf("error deleting device: %w", err)
|
||||
}
|
||||
|
||||
logger.Debugf("device %s deleted from control", string(id))
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(
|
||||
ctx context.Context,
|
||||
tailscaleClient tsClient,
|
||||
loginUrl string,
|
||||
tsClient tsclient.Client,
|
||||
pg *tsapi.ProxyGroup,
|
||||
proxyClass *tsapi.ProxyClass,
|
||||
svcToNodePorts map[string]uint16,
|
||||
@@ -756,7 +754,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
authKey, err := r.getAuthKey(ctx, tailscaleClient, pg, existingCfgSecret, i, logger)
|
||||
authKey, err := r.getAuthKey(ctx, tsClient, pg, existingCfgSecret, i, logger)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -838,8 +836,8 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(
|
||||
}
|
||||
}
|
||||
|
||||
if loginUrl != "" {
|
||||
cfg.ServerURL = new(loginUrl)
|
||||
if tsClient.LoginURL() != "" {
|
||||
cfg.ServerURL = new(tsClient.LoginURL())
|
||||
}
|
||||
|
||||
if proxyClass != nil && proxyClass.Spec.TailscaleConfig != nil {
|
||||
@@ -867,7 +865,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(
|
||||
return nil, err
|
||||
}
|
||||
|
||||
configs, err := pgTailscaledConfig(pg, loginUrl, proxyClass, i, authKey, endpoints[nodePortSvcName], existingAdvertiseServices)
|
||||
configs, err := pgTailscaledConfig(pg, tsClient.LoginURL(), proxyClass, i, authKey, endpoints[nodePortSvcName], existingAdvertiseServices)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error creating tailscaled config: %w", err)
|
||||
}
|
||||
@@ -904,7 +902,7 @@ func (r *ProxyGroupReconciler) ensureConfigSecretsCreated(
|
||||
// A new key is created if the config Secret doesn't exist yet, or if the
|
||||
// proxy has requested a reissue via its state Secret. An existing key is
|
||||
// retained while the device hasn't authed or a reissue is in progress.
|
||||
func (r *ProxyGroupReconciler) getAuthKey(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, existingCfgSecret *corev1.Secret, ordinal int32, logger *zap.SugaredLogger) (*string, error) {
|
||||
func (r *ProxyGroupReconciler) getAuthKey(ctx context.Context, tsClient tsclient.Client, pg *tsapi.ProxyGroup, existingCfgSecret *corev1.Secret, ordinal int32, logger *zap.SugaredLogger) (*string, error) {
|
||||
// Get state Secret to check if it's already authed or has requested
|
||||
// a fresh auth key.
|
||||
stateSecret := &corev1.Secret{
|
||||
@@ -931,7 +929,7 @@ func (r *ProxyGroupReconciler) getAuthKey(ctx context.Context, tailscaleClient t
|
||||
|
||||
if !createAuthKey {
|
||||
var err error
|
||||
createAuthKey, err = r.shouldReissueAuthKey(ctx, tailscaleClient, pg, stateSecret, cfgAuthKey)
|
||||
createAuthKey, err = r.shouldReissueAuthKey(ctx, tsClient, pg, stateSecret, cfgAuthKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -945,7 +943,7 @@ func (r *ProxyGroupReconciler) getAuthKey(ctx context.Context, tailscaleClient t
|
||||
if len(tags) == 0 {
|
||||
tags = r.defaultTags
|
||||
}
|
||||
key, err := newAuthKey(ctx, tailscaleClient, tags)
|
||||
key, err := newAuthKey(ctx, tsClient, tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -965,7 +963,7 @@ func (r *ProxyGroupReconciler) getAuthKey(ctx context.Context, tailscaleClient t
|
||||
// shouldReissueAuthKey returns true if the proxy needs a new auth key. It
|
||||
// tracks in-flight reissues via authKeyReissuing to avoid duplicate API calls
|
||||
// across reconciles.
|
||||
func (r *ProxyGroupReconciler) shouldReissueAuthKey(ctx context.Context, tailscaleClient tsClient, pg *tsapi.ProxyGroup, stateSecret *corev1.Secret, cfgAuthKey *string) (shouldReissue bool, err error) {
|
||||
func (r *ProxyGroupReconciler) shouldReissueAuthKey(ctx context.Context, tsClient tsclient.Client, pg *tsapi.ProxyGroup, stateSecret *corev1.Secret, cfgAuthKey *string) (shouldReissue bool, err error) {
|
||||
r.mu.Lock()
|
||||
reissuing := r.authKeyReissuing[stateSecret.Name]
|
||||
r.mu.Unlock()
|
||||
@@ -1017,7 +1015,7 @@ func (r *ProxyGroupReconciler) shouldReissueAuthKey(ctx context.Context, tailsca
|
||||
r.log.Infof("Proxy failing to auth; attempting cleanup and new key")
|
||||
if tsID := stateSecret.Data[kubetypes.KeyDeviceID]; len(tsID) > 0 {
|
||||
id := tailcfg.StableNodeID(tsID)
|
||||
if err := r.ensureDeviceDeleted(ctx, tailscaleClient, id, r.log); err != nil {
|
||||
if err = r.ensureDeviceDeleted(ctx, tsClient, id, r.log); err != nil {
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
@@ -1305,29 +1303,6 @@ func (r *ProxyGroupReconciler) getRunningProxies(ctx context.Context, pg *tsapi.
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
// getClientAndLoginURL returns the appropriate Tailscale client and resolved login URL
|
||||
// for the given tailnet name. If no tailnet is specified, returns the default client
|
||||
// and login server. Applies fallback to the operator's login server if the tailnet
|
||||
// doesn't specify a custom login URL.
|
||||
func (r *ProxyGroupReconciler) getClientAndLoginURL(ctx context.Context, tailnetName string) (tsClient,
|
||||
string, error) {
|
||||
if tailnetName == "" {
|
||||
return r.tsClient, r.loginServer, nil
|
||||
}
|
||||
|
||||
tc, loginUrl, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tailnetName)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
|
||||
// Apply fallback if tailnet doesn't specify custom login URL
|
||||
if loginUrl == "" {
|
||||
loginUrl = r.loginServer
|
||||
}
|
||||
|
||||
return tc, loginUrl, nil
|
||||
}
|
||||
|
||||
type nodeMetadata struct {
|
||||
ordinal int32
|
||||
stateSecret *corev1.Secret
|
||||
|
||||
Reference in New Issue
Block a user