cmd/k8s-operator,k8s-operator: Allow the use of multiple tailnets (#18344)
This commit contains the implementation of multi-tailnet support within the Kubernetes Operator
Each of our custom resources now expose the `spec.tailnet` field. This field is a string that must match the name of an existing `Tailnet` resource. A `Tailnet` resource looks like this:
```yaml
apiVersion: tailscale.com/v1alpha1
kind: Tailnet
metadata:
name: example # This is the name that must be referenced by other resources
spec:
credentials:
secretName: example-oauth
```
Each `Tailnet` references a `Secret` resource that contains a set of oauth credentials. This secret must be created in the same namespace as the operator:
```yaml
apiVersion: v1
kind: Secret
metadata:
name: example-oauth # This is the name that's referenced by the Tailnet resource.
namespace: tailscale
stringData:
client_id: "client-id"
client_secret: "client-secret"
```
When created, the operator performs a basic check that the oauth client has access to all required scopes. This is done using read actions on devices, keys & services. While this doesn't capture a missing "write" permission, it catches completely missing permissions. Once this check passes, the `Tailnet` moves into a ready state and can be referenced. Attempting to use a `Tailnet` in a non-ready state will stall the deployment of `Connector`s, `ProxyGroup`s and `Recorder`s until the `Tailnet` becomes ready.
The `spec.tailnet` field informs the operator that a `Connector`, `ProxyGroup`, or `Recorder` must be given an auth key generated using the specified oauth client. For backwards compatibility, the set of credentials the operator is configured with are considered the default. That is, where `spec.tailnet` is not set, the resource will be deployed in the same tailnet as the operator.
Updates https://github.com/tailscale/corp/issues/34561
This commit is contained in:
+31
-6
@@ -107,6 +107,7 @@ const (
|
||||
letsEncryptStagingEndpoint = "https://acme-staging-v02.api.letsencrypt.org/directory"
|
||||
|
||||
mainContainerName = "tailscale"
|
||||
operatorTailnet = ""
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -152,6 +153,9 @@ type tailscaleSTSConfig struct {
|
||||
// HostnamePrefix specifies the desired prefix for the device's hostname. The hostname will be suffixed with the
|
||||
// ordinal number generated by the StatefulSet.
|
||||
HostnamePrefix string
|
||||
|
||||
// Tailnet specifies the Tailnet resource to use for producing auth keys.
|
||||
Tailnet string
|
||||
}
|
||||
|
||||
type connector struct {
|
||||
@@ -194,6 +198,16 @@ func IsHTTPSEnabledOnTailnet(tsnetServer tsnetServer) bool {
|
||||
// Provision ensures that the StatefulSet for the given service is running and
|
||||
// up to date.
|
||||
func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.SugaredLogger, sts *tailscaleSTSConfig) (*corev1.Service, error) {
|
||||
tailscaleClient := a.tsClient
|
||||
if sts.Tailnet != "" {
|
||||
tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, sts.Tailnet)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tailscaleClient = tc
|
||||
}
|
||||
|
||||
// Do full reconcile.
|
||||
// TODO (don't create Service for the Connector)
|
||||
hsvc, err := a.reconcileHeadlessService(ctx, logger, sts)
|
||||
@@ -213,7 +227,7 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga
|
||||
}
|
||||
sts.ProxyClass = proxyClass
|
||||
|
||||
secretNames, err := a.provisionSecrets(ctx, logger, sts, hsvc)
|
||||
secretNames, err := a.provisionSecrets(ctx, tailscaleClient, logger, sts, hsvc)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create or get API key secret: %w", err)
|
||||
}
|
||||
@@ -237,7 +251,18 @@ func (a *tailscaleSTSReconciler) Provision(ctx context.Context, logger *zap.Suga
|
||||
// Cleanup removes all resources associated that were created by Provision with
|
||||
// the given labels. It returns true when all resources have been removed,
|
||||
// otherwise it returns false and the caller should retry later.
|
||||
func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) {
|
||||
func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, tailnet string, logger *zap.SugaredLogger, labels map[string]string, typ string) (done bool, _ error) {
|
||||
tailscaleClient := a.tsClient
|
||||
if tailnet != "" {
|
||||
tc, err := clientForTailnet(ctx, a.Client, a.operatorNamespace, tailnet)
|
||||
if err != nil {
|
||||
logger.Errorf("failed to get tailscale client: %v", err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
tailscaleClient = tc
|
||||
}
|
||||
|
||||
// Need to delete the StatefulSet first, and delete it with foreground
|
||||
// cascading deletion. That way, the pod that's writing to the Secret will
|
||||
// stop running before we start looking at the Secret's contents, and
|
||||
@@ -279,7 +304,7 @@ func (a *tailscaleSTSReconciler) Cleanup(ctx context.Context, logger *zap.Sugare
|
||||
for _, dev := range devices {
|
||||
if dev.id != "" {
|
||||
logger.Debugf("deleting device %s from control", string(dev.id))
|
||||
if err = a.tsClient.DeleteDevice(ctx, string(dev.id)); err != nil {
|
||||
if err = tailscaleClient.DeleteDevice(ctx, string(dev.id)); err != nil {
|
||||
errResp := &tailscale.ErrResponse{}
|
||||
if ok := errors.As(err, errResp); ok && errResp.Status == http.StatusNotFound {
|
||||
logger.Debugf("device %s not found, likely because it has already been deleted from control", string(dev.id))
|
||||
@@ -360,7 +385,7 @@ func (a *tailscaleSTSReconciler) reconcileHeadlessService(ctx context.Context, l
|
||||
return createOrUpdate(ctx, a.Client, a.operatorNamespace, hsvc, func(svc *corev1.Service) { svc.Spec = hsvc.Spec })
|
||||
}
|
||||
|
||||
func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) ([]string, error) {
|
||||
func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, tailscaleClient tsClient, logger *zap.SugaredLogger, stsC *tailscaleSTSConfig, hsvc *corev1.Service) ([]string, error) {
|
||||
secretNames := make([]string, stsC.Replicas)
|
||||
|
||||
// Start by ensuring we have Secrets for the desired number of replicas. This will handle both creating and scaling
|
||||
@@ -403,7 +428,7 @@ func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *z
|
||||
if len(tags) == 0 {
|
||||
tags = a.defaultTags
|
||||
}
|
||||
authKey, err = newAuthKey(ctx, a.tsClient, tags)
|
||||
authKey, err = newAuthKey(ctx, tailscaleClient, tags)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -477,7 +502,7 @@ func (a *tailscaleSTSReconciler) provisionSecrets(ctx context.Context, logger *z
|
||||
if dev != nil && dev.id != "" {
|
||||
var errResp *tailscale.ErrResponse
|
||||
|
||||
err = a.tsClient.DeleteDevice(ctx, string(dev.id))
|
||||
err = tailscaleClient.DeleteDevice(ctx, string(dev.id))
|
||||
switch {
|
||||
case errors.As(err, &errResp) && errResp.Status == http.StatusNotFound:
|
||||
// This device has possibly already been deleted in the admin console. So we can ignore this
|
||||
|
||||
Reference in New Issue
Block a user