cmd/k8s-operator: migrate to tailscale-client-go-v2 (#19010)

This commit modifies the kubernetes operator to use the `tailscale-client-go-v2`
package instead of the internal tailscale client it was previously using. This
now gives us the ability to expand out custom resources and features as they
become available via the API module.

The tailnet reconciler has also been modified to manage clients as tailnets
are created and removed, providing each subsequent reconciler with a single
`ClientProvider` that obtains a tailscale client for the respective tailnet
by name, or the operator's default when presented with a blank string.

Fixes: https://github.com/tailscale/corp/issues/38418

Signed-off-by: David Bond <davidsbond93@gmail.com>
This commit is contained in:
David Bond
2026-04-09 14:39:46 +01:00
committed by GitHub
parent b25920dfc0
commit 85d6ba9473
33 changed files with 916 additions and 940 deletions
+29 -52
View File
@@ -10,7 +10,6 @@ import (
"encoding/json"
"errors"
"fmt"
"net/http"
"slices"
"strconv"
"strings"
@@ -30,10 +29,11 @@ import (
"k8s.io/client-go/tools/record"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
"tailscale.com/client/tailscale/v2"
"tailscale.com/client/tailscale"
tsoperator "tailscale.com/k8s-operator"
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
"tailscale.com/k8s-operator/tsclient"
"tailscale.com/kube/kubetypes"
"tailscale.com/tailcfg"
"tailscale.com/tstime"
@@ -60,9 +60,8 @@ type RecorderReconciler struct {
log *zap.SugaredLogger
recorder record.EventRecorder
clock tstime.Clock
clients ClientProvider
tsNamespace string
tsClient tsClient
loginServer string
mu sync.Mutex // protects following
recorders set.Slice[types.UID] // for recorders gauge
@@ -99,7 +98,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
return reconcile.Result{}, nil
}
tailscaleClient, loginUrl, err := r.getClientAndLoginURL(ctx, tsr.Spec.Tailnet)
tsClient, err := r.clients.For(tsr.Spec.Tailnet)
if err != nil {
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderTailnetUnavailable, err.Error())
}
@@ -112,7 +111,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
return reconcile.Result{}, nil
}
if done, err := r.maybeCleanup(ctx, tsr, tailscaleClient); err != nil {
if done, err := r.maybeCleanup(ctx, tsr, tsClient); err != nil {
return reconcile.Result{}, err
} else if !done {
logger.Debugf("Recorder resource cleanup not yet finished, will retry...")
@@ -144,7 +143,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
return setStatusReady(tsr, metav1.ConditionFalse, reasonRecorderInvalid, message)
}
if err = r.maybeProvision(ctx, tailscaleClient, loginUrl, tsr); err != nil {
if err = r.maybeProvision(ctx, tsClient, tsr); err != nil {
reason := reasonRecorderCreationFailed
message := fmt.Sprintf("failed creating Recorder: %s", err)
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
@@ -162,30 +161,7 @@ func (r *RecorderReconciler) Reconcile(ctx context.Context, req reconcile.Reques
return setStatusReady(tsr, metav1.ConditionTrue, reasonRecorderCreated, reasonRecorderCreated)
}
// getClientAndLoginURL returns the appropriate Tailscale client and resolved login URL
// for the given tailnet name. If no tailnet is specified, returns the default client
// and login server. Applies fallback to the operator's login server if the tailnet
// doesn't specify a custom login URL.
func (r *RecorderReconciler) getClientAndLoginURL(ctx context.Context, tailnetName string) (tsClient,
string, error) {
if tailnetName == "" {
return r.tsClient, r.loginServer, nil
}
tc, loginUrl, err := clientForTailnet(ctx, r.Client, r.tsNamespace, tailnetName)
if err != nil {
return nil, "", err
}
// Apply fallback if tailnet doesn't specify custom login URL
if loginUrl == "" {
loginUrl = r.loginServer
}
return tc, loginUrl, nil
}
func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient tsClient, loginUrl string, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) maybeProvision(ctx context.Context, tsClient tsclient.Client, tsr *tsapi.Recorder) error {
logger := r.logger(tsr.Name)
r.mu.Lock()
@@ -193,7 +169,7 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient
gaugeRecorderResources.Set(int64(r.recorders.Len()))
r.mu.Unlock()
if err := r.ensureAuthSecretsCreated(ctx, tailscaleClient, tsr); err != nil {
if err := r.ensureAuthSecretsCreated(ctx, tsClient, tsr); err != nil {
return fmt.Errorf("error creating secrets: %w", err)
}
@@ -252,7 +228,7 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient
return fmt.Errorf("error creating RoleBinding: %w", err)
}
ss := tsrStatefulSet(tsr, r.tsNamespace, loginUrl)
ss := tsrStatefulSet(tsr, r.tsNamespace, tsClient.LoginURL())
_, err = createOrUpdate(ctx, r.Client, r.tsNamespace, ss, func(s *appsv1.StatefulSet) {
s.ObjectMeta.Labels = ss.ObjectMeta.Labels
s.ObjectMeta.Annotations = ss.ObjectMeta.Annotations
@@ -271,13 +247,13 @@ func (r *RecorderReconciler) maybeProvision(ctx context.Context, tailscaleClient
// If we have scaled the recorder down, we will have dangling state secrets
// that we need to clean up.
if err = r.maybeCleanupSecrets(ctx, tailscaleClient, tsr); err != nil {
if err = r.maybeCleanupSecrets(ctx, tsClient, tsr); err != nil {
return fmt.Errorf("error cleaning up Secrets: %w", err)
}
var devices []tsapi.RecorderTailnetDevice
for replica := range replicas {
dev, ok, err := r.getDeviceInfo(ctx, tailscaleClient, tsr.Name, replica)
dev, ok, err := r.getDeviceInfo(ctx, tsClient, tsr.Name, replica)
switch {
case err != nil:
return fmt.Errorf("failed to get device info: %w", err)
@@ -342,7 +318,7 @@ func (r *RecorderReconciler) maybeCleanupServiceAccounts(ctx context.Context, ts
return nil
}
func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tsClient tsclient.Client, tsr *tsapi.Recorder) error {
options := []client.ListOption{
client.InNamespace(r.tsNamespace),
client.MatchingLabels(tsrLabels("recorder", tsr.Name, nil)),
@@ -382,11 +358,12 @@ func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tailscaleC
if ok {
r.log.Debugf("deleting device %s", devicePrefs.Config.NodeID)
err = tailscaleClient.DeleteDevice(ctx, string(devicePrefs.Config.NodeID))
if errResp, ok := errors.AsType[*tailscale.ErrResponse](err); ok && errResp.Status == http.StatusNotFound {
// This device has possibly already been deleted in the admin console. So we can ignore this
// and move on to removing the secret.
} else if err != nil {
err = tsClient.Devices().Delete(ctx, string(devicePrefs.Config.NodeID))
switch {
case tailscale.IsNotFound(err):
// This device has possibly already been deleted in the admin console. So we can ignore this
// and move on to removing the secret.
case err != nil:
return err
}
}
@@ -402,7 +379,7 @@ func (r *RecorderReconciler) maybeCleanupSecrets(ctx context.Context, tailscaleC
// maybeCleanup just deletes the device from the tailnet. All the kubernetes
// resources linked to a Recorder will get cleaned up via owner references
// (which we can use because they are all in the same namespace).
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder, tailscaleClient tsClient) (bool, error) {
func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Recorder, tsClient tsclient.Client) (bool, error) {
logger := r.logger(tsr.Name)
var replicas int32 = 1
@@ -426,12 +403,12 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
nodeID := string(devicePrefs.Config.NodeID)
logger.Debugf("deleting device %s from control", nodeID)
if err = tailscaleClient.DeleteDevice(ctx, nodeID); err != nil {
if errResp, ok := errors.AsType[tailscale.ErrResponse](err); ok && errResp.Status == http.StatusNotFound {
logger.Debugf("device %s not found, likely because it has already been deleted from control", nodeID)
continue
}
err = tsClient.Devices().Delete(ctx, nodeID)
switch {
case tailscale.IsNotFound(err):
logger.Debugf("device %s not found, likely because it has already been deleted from control", nodeID)
continue
case err != nil:
return false, fmt.Errorf("error deleting device: %w", err)
}
@@ -451,7 +428,7 @@ func (r *RecorderReconciler) maybeCleanup(ctx context.Context, tsr *tsapi.Record
return true, nil
}
func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tailscaleClient tsClient, tsr *tsapi.Recorder) error {
func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tsClient tsclient.Client, tsr *tsapi.Recorder) error {
var replicas int32 = 1
if tsr.Spec.Replicas != nil {
replicas = *tsr.Spec.Replicas
@@ -479,7 +456,7 @@ func (r *RecorderReconciler) ensureAuthSecretsCreated(ctx context.Context, tails
return fmt.Errorf("failed to get Secret %q: %w", key.Name, err)
}
authKey, err := newAuthKey(ctx, tailscaleClient, tags.Stringify())
authKey, err := newAuthKey(ctx, tsClient, tags.Stringify())
if err != nil {
return err
}
@@ -581,7 +558,7 @@ func getDevicePrefs(secret *corev1.Secret) (prefs prefs, ok bool, err error) {
return prefs, ok, nil
}
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tailscaleClient tsClient, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tsClient tsclient.Client, tsrName string, replica int32) (d tsapi.RecorderTailnetDevice, ok bool, err error) {
secret, err := r.getStateSecret(ctx, tsrName, replica)
if err != nil || secret == nil {
return tsapi.RecorderTailnetDevice{}, false, err
@@ -595,7 +572,7 @@ func (r *RecorderReconciler) getDeviceInfo(ctx context.Context, tailscaleClient
// TODO(tomhjp): The profile info doesn't include addresses, which is why we
// need the API. Should maybe update tsrecorder to write IPs to the state
// Secret like containerboot does.
device, err := tailscaleClient.Device(ctx, string(prefs.Config.NodeID), nil)
device, err := tsClient.Devices().Get(ctx, string(prefs.Config.NodeID))
if err != nil {
return tsapi.RecorderTailnetDevice{}, false, fmt.Errorf("failed to get device info from API: %w", err)
}