cmd/k8s-operator: migrate to tailscale-client-go-v2 (#19010)
This commit modifies the kubernetes operator to use the `tailscale-client-go-v2` package instead of the internal tailscale client it was previously using. This now gives us the ability to expand out custom resources and features as they become available via the API module. The tailnet reconciler has also been modified to manage clients as tailnets are created and removed, providing each subsequent reconciler with a single `ClientProvider` that obtains a tailscale client for the respective tailnet by name, or the operator's default when presented with a blank string. Fixes: https://github.com/tailscale/corp/issues/38418 Signed-off-by: David Bond <davidsbond93@gmail.com>
This commit is contained in:
@@ -10,7 +10,6 @@ import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"reflect"
|
||||
"slices"
|
||||
@@ -27,11 +26,12 @@ import (
|
||||
"k8s.io/client-go/tools/record"
|
||||
"sigs.k8s.io/controller-runtime/pkg/client"
|
||||
"sigs.k8s.io/controller-runtime/pkg/reconcile"
|
||||
"tailscale.com/client/tailscale/v2"
|
||||
|
||||
"tailscale.com/internal/client/tailscale"
|
||||
"tailscale.com/ipn"
|
||||
tsoperator "tailscale.com/k8s-operator"
|
||||
tsapi "tailscale.com/k8s-operator/apis/v1alpha1"
|
||||
"tailscale.com/k8s-operator/tsclient"
|
||||
"tailscale.com/kube/ingressservices"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/tailcfg"
|
||||
@@ -57,7 +57,7 @@ type HAServiceReconciler struct {
|
||||
isDefaultLoadBalancer bool
|
||||
recorder record.EventRecorder
|
||||
logger *zap.SugaredLogger
|
||||
tsClient tsClient
|
||||
clients ClientProvider
|
||||
tsNamespace string
|
||||
defaultTags []string
|
||||
operatorID string // stableID of the operator's Tailscale device
|
||||
@@ -121,7 +121,7 @@ func (r *HAServiceReconciler) Reconcile(ctx context.Context, req reconcile.Reque
|
||||
return res, nil
|
||||
}
|
||||
|
||||
tailscaleClient, err := clientFromProxyGroup(ctx, r.Client, pg, r.tsNamespace, r.tsClient)
|
||||
tsClient, err := r.clients.For(pg.Spec.Tailnet)
|
||||
if err != nil {
|
||||
return res, fmt.Errorf("failed to get tailscale client: %w", err)
|
||||
}
|
||||
@@ -131,7 +131,7 @@ func (r *HAServiceReconciler) Reconcile(ctx context.Context, req reconcile.Reque
|
||||
|
||||
if !svc.DeletionTimestamp.IsZero() || !r.isTailscaleService(svc) {
|
||||
logger.Debugf("Service is being deleted or is (no longer) referring to Tailscale ingress/egress, ensuring any created resources are cleaned up")
|
||||
_, err = r.maybeCleanup(ctx, hostname, svc, logger, tailscaleClient)
|
||||
_, err = r.maybeCleanup(ctx, hostname, svc, logger, tsClient)
|
||||
return res, err
|
||||
}
|
||||
|
||||
@@ -139,7 +139,7 @@ func (r *HAServiceReconciler) Reconcile(ctx context.Context, req reconcile.Reque
|
||||
// is the case, we reconcile the Ingress one more time to ensure that concurrent updates to the Tailscale Service in a
|
||||
// multi-cluster Ingress setup have not resulted in another actor overwriting our Tailscale Service update.
|
||||
needsRequeue := false
|
||||
needsRequeue, err = r.maybeProvision(ctx, hostname, svc, pg, logger, tailscaleClient)
|
||||
needsRequeue, err = r.maybeProvision(ctx, hostname, svc, pg, logger, tsClient)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), optimisticLockErrorMsg) {
|
||||
logger.Infof("optimistic lock error, retrying: %s", err)
|
||||
@@ -162,7 +162,7 @@ func (r *HAServiceReconciler) Reconcile(ctx context.Context, req reconcile.Reque
|
||||
// If a Tailscale Service exists, but does not have an owner reference from any operator, we error
|
||||
// out assuming that this is an owner reference created by an unknown actor.
|
||||
// Returns true if the operation resulted in a Tailscale Service update.
|
||||
func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname string, svc *corev1.Service, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger, tsClient tsClient) (svcsChanged bool, err error) {
|
||||
func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname string, svc *corev1.Service, pg *tsapi.ProxyGroup, logger *zap.SugaredLogger, tsClient tsclient.Client) (svcsChanged bool, err error) {
|
||||
oldSvcStatus := svc.Status.DeepCopy()
|
||||
defer func() {
|
||||
if !apiequality.Semantic.DeepEqual(oldSvcStatus, &svc.Status) {
|
||||
@@ -209,8 +209,8 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
// 2. Ensure that there isn't a Tailscale Service with the same hostname
|
||||
// already created and not owned by this Service.
|
||||
serviceName := tailcfg.ServiceName("svc:" + hostname)
|
||||
existingTSSvc, err := tsClient.GetVIPService(ctx, serviceName)
|
||||
if err != nil && !isErrorTailscaleServiceNotFound(err) {
|
||||
existingTSSvc, err := tsClient.VIPServices().Get(ctx, serviceName.String())
|
||||
if err != nil && !tailscale.IsNotFound(err) {
|
||||
return false, fmt.Errorf("error getting Tailscale Service %q: %w", hostname, err)
|
||||
}
|
||||
|
||||
@@ -233,8 +233,8 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
tags = strings.Split(tstr, ",")
|
||||
}
|
||||
|
||||
tsSvc := &tailscale.VIPService{
|
||||
Name: serviceName,
|
||||
tsSvc := tailscale.VIPService{
|
||||
Name: serviceName.String(),
|
||||
Tags: tags,
|
||||
Ports: []string{"do-not-validate"}, // we don't want to validate ports
|
||||
Comment: managedTSServiceComment,
|
||||
@@ -249,12 +249,13 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
// with the same generation number has been reconciled ~more than N times and stop attempting to apply updates.
|
||||
if existingTSSvc == nil ||
|
||||
!reflect.DeepEqual(tsSvc.Tags, existingTSSvc.Tags) ||
|
||||
!ownersAreSetAndEqual(tsSvc, existingTSSvc) {
|
||||
!ownersAreSetAndEqual(tsSvc, *existingTSSvc) {
|
||||
logger.Infof("Ensuring Tailscale Service exists and is up to date")
|
||||
if err := tsClient.CreateOrUpdateVIPService(ctx, tsSvc); err != nil {
|
||||
if err = tsClient.VIPServices().CreateOrUpdate(ctx, tsSvc); err != nil {
|
||||
return false, fmt.Errorf("error creating Tailscale Service: %w", err)
|
||||
}
|
||||
existingTSSvc = tsSvc
|
||||
|
||||
existingTSSvc = &tsSvc
|
||||
}
|
||||
|
||||
cm, cfgs, err := ingressSvcsConfigs(ctx, r.Client, pg.Name, r.tsNamespace)
|
||||
@@ -266,12 +267,12 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if existingTSSvc.Addrs == nil {
|
||||
existingTSSvc, err = tsClient.GetVIPService(ctx, tsSvc.Name)
|
||||
if err != nil {
|
||||
if len(existingTSSvc.Addrs) == 0 {
|
||||
existingTSSvc, err = tsClient.VIPServices().Get(ctx, tsSvc.Name)
|
||||
switch {
|
||||
case err != nil:
|
||||
return false, fmt.Errorf("error getting Tailscale Service: %w", err)
|
||||
}
|
||||
if existingTSSvc.Addrs == nil {
|
||||
case len(existingTSSvc.Addrs) == 0:
|
||||
// TODO(irbekrm): this should be a retry
|
||||
return false, fmt.Errorf("unexpected: Tailscale Service addresses not populated")
|
||||
}
|
||||
@@ -374,7 +375,7 @@ func (r *HAServiceReconciler) maybeProvision(ctx context.Context, hostname strin
|
||||
// Service is being deleted or is unexposed. The cleanup is safe for a multi-cluster setup- the Tailscale Service is only
|
||||
// deleted if it does not contain any other owner references. If it does the cleanup only removes the owner reference
|
||||
// corresponding to this Service.
|
||||
func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string, svc *corev1.Service, logger *zap.SugaredLogger, tsClient tsClient) (svcChanged bool, err error) {
|
||||
func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string, svc *corev1.Service, logger *zap.SugaredLogger, tsClient tsclient.Client) (svcChanged bool, err error) {
|
||||
logger.Debugf("Ensuring any resources for Service are cleaned up")
|
||||
ix := slices.Index(svc.Finalizers, svcPGFinalizerName)
|
||||
if ix < 0 {
|
||||
@@ -392,7 +393,7 @@ func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string,
|
||||
|
||||
serviceName := tailcfg.ServiceName("svc:" + hostname)
|
||||
// 1. Clean up the Tailscale Service.
|
||||
svcChanged, err = cleanupTailscaleService(ctx, tsClient, serviceName, r.operatorID, logger)
|
||||
svcChanged, err = cleanupTailscaleService(ctx, tsClient, serviceName.String(), r.operatorID, logger)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error deleting Tailscale Service: %w", err)
|
||||
}
|
||||
@@ -425,7 +426,7 @@ func (r *HAServiceReconciler) maybeCleanup(ctx context.Context, hostname string,
|
||||
|
||||
// Tailscale Services that are associated with the provided ProxyGroup and no longer managed this operator's instance are deleted, if not owned by other operator instances, else the owner reference is cleaned up.
|
||||
// Returns true if the operation resulted in existing Tailscale Service updates (owner reference removal).
|
||||
func (r *HAServiceReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger, tsClient tsClient) (svcsChanged bool, err error) {
|
||||
func (r *HAServiceReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyGroupName string, logger *zap.SugaredLogger, tsClient tsclient.Client) (svcsChanged bool, err error) {
|
||||
cm, config, err := ingressSvcsConfigs(ctx, r.Client, proxyGroupName, r.tsNamespace)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("failed to get ingress service config: %s", err)
|
||||
@@ -453,7 +454,7 @@ func (r *HAServiceReconciler) maybeCleanupProxyGroup(ctx context.Context, proxyG
|
||||
return false, fmt.Errorf("failed to update tailscaled config services: %w", err)
|
||||
}
|
||||
|
||||
svcsChanged, err = cleanupTailscaleService(ctx, tsClient, tailcfg.ServiceName(tsSvcName), r.operatorID, logger)
|
||||
svcsChanged, err = cleanupTailscaleService(ctx, tsClient, tsSvcName, r.operatorID, logger)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("deleting Tailscale Service %q: %w", tsSvcName, err)
|
||||
}
|
||||
@@ -517,29 +518,28 @@ func (r *HAServiceReconciler) shouldExposeClusterIP(svc *corev1.Service) bool {
|
||||
// If a Tailscale Service is found, but contains other owner references, only removes this operator's owner reference.
|
||||
// If a Tailscale Service by the given name is not found or does not contain this operator's owner reference, do nothing.
|
||||
// It returns true if an existing Tailscale Service was updated to remove owner reference, as well as any error that occurred.
|
||||
func cleanupTailscaleService(ctx context.Context, tsClient tsClient, name tailcfg.ServiceName, operatorID string, logger *zap.SugaredLogger) (updated bool, err error) {
|
||||
svc, err := tsClient.GetVIPService(ctx, name)
|
||||
if err != nil {
|
||||
errResp, ok := errors.AsType[tailscale.ErrResponse](err)
|
||||
if ok && errResp.Status == http.StatusNotFound {
|
||||
return false, nil
|
||||
}
|
||||
if !ok {
|
||||
return false, fmt.Errorf("unexpected error getting Tailscale Service %q: %w", name.String(), err)
|
||||
}
|
||||
|
||||
return false, fmt.Errorf("error getting Tailscale Service: %w", err)
|
||||
func cleanupTailscaleService(ctx context.Context, tsClient tsclient.Client, name string, operatorID string, logger *zap.SugaredLogger) (updated bool, err error) {
|
||||
svc, err := tsClient.VIPServices().Get(ctx, name)
|
||||
switch {
|
||||
case tailscale.IsNotFound(err):
|
||||
return false, nil
|
||||
case err != nil:
|
||||
return false, fmt.Errorf("unexpected error getting Tailscale Service %q: %w", name, err)
|
||||
}
|
||||
|
||||
if svc == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
o, err := parseOwnerAnnotation(svc)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error parsing Tailscale Service owner annotation: %w", err)
|
||||
}
|
||||
|
||||
if o == nil || len(o.OwnerRefs) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Comparing with the operatorID only means that we will not be able to
|
||||
// clean up Tailscale Services in cases where the operator was deleted from the
|
||||
// cluster before deleting the Ingress. Perhaps the comparison could be
|
||||
@@ -550,18 +550,22 @@ func cleanupTailscaleService(ctx context.Context, tsClient tsClient, name tailcf
|
||||
if ix == -1 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
if len(o.OwnerRefs) == 1 {
|
||||
logger.Infof("Deleting Tailscale Service %q", name)
|
||||
return false, tsClient.DeleteVIPService(ctx, name)
|
||||
return false, tsClient.VIPServices().Delete(ctx, name)
|
||||
}
|
||||
|
||||
o.OwnerRefs = slices.Delete(o.OwnerRefs, ix, ix+1)
|
||||
logger.Infof("Updating Tailscale Service %q", name)
|
||||
json, err := json.Marshal(o)
|
||||
|
||||
data, err := json.Marshal(o)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error marshalling updated Tailscale Service owner reference: %w", err)
|
||||
}
|
||||
svc.Annotations[ownerAnnotation] = string(json)
|
||||
return true, tsClient.CreateOrUpdateVIPService(ctx, svc)
|
||||
|
||||
svc.Annotations[ownerAnnotation] = string(data)
|
||||
return true, tsClient.VIPServices().CreateOrUpdate(ctx, *svc)
|
||||
}
|
||||
|
||||
func (r *HAServiceReconciler) backendRoutesSetup(ctx context.Context, serviceName, replicaName string, wantsCfg *ingressservices.Config, logger *zap.SugaredLogger) (bool, error) {
|
||||
|
||||
Reference in New Issue
Block a user