cmd/{containerboot,k8s-operator}: reissue auth keys for broken proxies (#16450)
Adds logic for containerboot to signal that it can't auth, so the
operator can reissue a new auth key. This only applies when running with
a config file and with a kube state store.
If the operator sees reissue_authkey in a state Secret, it will create a
new auth key iff the config has no auth key or its auth key matches the
value of reissue_authkey from the state Secret. This is to ensure we
don't reissue auth keys in a tight loop if the proxy is slow to start or
failing for some other reason. The reissue logic also uses a burstable
rate limiter to ensure there's no way a terminally misconfigured
or buggy operator can automatically generate new auth keys in a tight loop.
Additional implementation details (ChaosInTheCRD):
- Added `ipn.NotifyInitialHealthState` to ipn watcher, to ensure that
`n.Health` is populated when notify's are returned.
- on auth failure, containerboot:
- Disconnects from control server
- Sets reissue_authkey marker in state Secret with the failing key
- Polls config file for new auth key (10 minute timeout)
- Restarts after receiving new key to apply it
- modified operator's reissue logic slightly:
- Deletes old device from tailnet before creating new key
- Rate limiting: 1 key per 30s with initial burst equal to replica count
- In-flight tracking (authKeyReissuing map) prevents duplicate API calls
across reconcile loops
Updates #14080
Change-Id: I6982f8e741932a6891f2f48a2936f7f6a455317f
(cherry picked from commit 969927c47c3d4de05e90f5b26a6d8d931c5ceed4)
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
Co-authored-by: chaosinthecrd <tom@tmlabs.co.uk>
This commit is contained in:
+129
-21
@@ -14,9 +14,12 @@ import (
|
||||
"net/http"
|
||||
"net/netip"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/fsnotify/fsnotify"
|
||||
"tailscale.com/client/local"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/egressservices"
|
||||
"tailscale.com/kube/ingressservices"
|
||||
@@ -26,9 +29,11 @@ import (
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/backoff"
|
||||
"tailscale.com/util/set"
|
||||
)
|
||||
|
||||
const fieldManager = "tailscale-container"
|
||||
const kubeletMountedConfigLn = "..data"
|
||||
|
||||
// kubeClient is a wrapper around Tailscale's internal kube client that knows how to talk to the kube API server. We use
|
||||
// this rather than any of the upstream Kubernetes client libaries to avoid extra imports.
|
||||
type kubeClient struct {
|
||||
@@ -46,7 +51,7 @@ func newKubeClient(root string, stateSecret string) (*kubeClient, error) {
|
||||
var err error
|
||||
kc, err := kubeclient.New("tailscale-container")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Error creating kube client: %w", err)
|
||||
return nil, fmt.Errorf("error creating kube client: %w", err)
|
||||
}
|
||||
if (root != "/") || os.Getenv("TS_KUBERNETES_READ_API_SERVER_ADDRESS_FROM_ENV") == "true" {
|
||||
// Derive the API server address from the environment variables
|
||||
@@ -63,7 +68,7 @@ func (kc *kubeClient) storeDeviceID(ctx context.Context, deviceID tailcfg.Stable
|
||||
kubetypes.KeyDeviceID: []byte(deviceID),
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// storeDeviceEndpoints writes device's tailnet IPs and MagicDNS name to fields 'device_ips', 'device_fqdn' of client's
|
||||
@@ -84,7 +89,7 @@ func (kc *kubeClient) storeDeviceEndpoints(ctx context.Context, fqdn string, add
|
||||
kubetypes.KeyDeviceIPs: deviceIPs,
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// storeHTTPSEndpoint writes an HTTPS endpoint exposed by this device via 'tailscale serve' to the client's state
|
||||
@@ -96,7 +101,7 @@ func (kc *kubeClient) storeHTTPSEndpoint(ctx context.Context, ep string) error {
|
||||
kubetypes.KeyHTTPSEndpoint: []byte(ep),
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// deleteAuthKey deletes the 'authkey' field of the given kube
|
||||
@@ -122,7 +127,7 @@ func (kc *kubeClient) deleteAuthKey(ctx context.Context) error {
|
||||
|
||||
// resetContainerbootState resets state from previous runs of containerboot to
|
||||
// ensure the operator doesn't use stale state when a Pod is first recreated.
|
||||
func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string) error {
|
||||
func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string, tailscaledConfigAuthkey string) error {
|
||||
existingSecret, err := kc.GetSecret(ctx, kc.stateSecret)
|
||||
switch {
|
||||
case kubeclient.IsNotFoundErr(err):
|
||||
@@ -131,32 +136,135 @@ func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string
|
||||
case err != nil:
|
||||
return fmt.Errorf("failed to read state Secret %q to reset state: %w", kc.stateSecret, err)
|
||||
}
|
||||
|
||||
s := &kubeapi.Secret{
|
||||
Data: map[string][]byte{
|
||||
kubetypes.KeyCapVer: fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion),
|
||||
|
||||
// TODO(tomhjp): Perhaps shouldn't clear device ID and use a different signal, as this could leak tailnet devices.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
}
|
||||
if podUID != "" {
|
||||
s.Data[kubetypes.KeyPodUID] = []byte(podUID)
|
||||
}
|
||||
|
||||
toClear := set.SetOf([]string{
|
||||
kubetypes.KeyDeviceID,
|
||||
kubetypes.KeyDeviceFQDN,
|
||||
kubetypes.KeyDeviceIPs,
|
||||
kubetypes.KeyHTTPSEndpoint,
|
||||
egressservices.KeyEgressServices,
|
||||
ingressservices.IngressConfigKey,
|
||||
})
|
||||
for key := range existingSecret.Data {
|
||||
if toClear.Contains(key) {
|
||||
// It's fine to leave the key in place as a debugging breadcrumb,
|
||||
// it should get a new value soon.
|
||||
s.Data[key] = nil
|
||||
}
|
||||
// Only clear reissue_authkey if the operator has actioned it.
|
||||
brokenAuthkey, ok := existingSecret.Data[kubetypes.KeyReissueAuthkey]
|
||||
if ok && tailscaledConfigAuthkey != "" && string(brokenAuthkey) != tailscaledConfigAuthkey {
|
||||
s.Data[kubetypes.KeyReissueAuthkey] = nil
|
||||
}
|
||||
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
func (kc *kubeClient) setAndWaitForAuthKeyReissue(ctx context.Context, client *local.Client, cfg *settings, tailscaledConfigAuthKey string) error {
|
||||
err := client.DisconnectControl(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error disconnecting from control: %w", err)
|
||||
}
|
||||
|
||||
err = kc.setReissueAuthKey(ctx, tailscaledConfigAuthKey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set reissue_authkey in Kubernetes Secret: %w", err)
|
||||
}
|
||||
|
||||
err = kc.waitForAuthKeyReissue(ctx, cfg.TailscaledConfigFilePath, tailscaledConfigAuthKey, 10*time.Minute)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to receive new auth key: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (kc *kubeClient) setReissueAuthKey(ctx context.Context, authKey string) error {
|
||||
s := &kubeapi.Secret{
|
||||
Data: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte(authKey),
|
||||
},
|
||||
}
|
||||
|
||||
log.Printf("Requesting a new auth key from operator")
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
func (kc *kubeClient) waitForAuthKeyReissue(ctx context.Context, configPath string, oldAuthKey string, maxWait time.Duration) error {
|
||||
log.Printf("Waiting for operator to provide new auth key (max wait: %v)", maxWait)
|
||||
|
||||
ctx, cancel := context.WithTimeout(ctx, maxWait)
|
||||
defer cancel()
|
||||
|
||||
tailscaledCfgDir := filepath.Dir(configPath)
|
||||
toWatch := filepath.Join(tailscaledCfgDir, kubeletMountedConfigLn)
|
||||
|
||||
var (
|
||||
pollTicker <-chan time.Time
|
||||
eventChan <-chan fsnotify.Event
|
||||
)
|
||||
|
||||
pollInterval := 5 * time.Second
|
||||
|
||||
// Try to use fsnotify for faster notification
|
||||
if w, err := fsnotify.NewWatcher(); err != nil {
|
||||
log.Printf("auth key reissue: fsnotify unavailable, using polling: %v", err)
|
||||
} else if err := w.Add(tailscaledCfgDir); err != nil {
|
||||
w.Close()
|
||||
log.Printf("auth key reissue: fsnotify watch failed, using polling: %v", err)
|
||||
} else {
|
||||
defer w.Close()
|
||||
log.Printf("auth key reissue: watching for config changes via fsnotify")
|
||||
eventChan = w.Events
|
||||
}
|
||||
|
||||
// still keep polling if using fsnotify, for logging and in case fsnotify fails
|
||||
pt := time.NewTicker(pollInterval)
|
||||
defer pt.Stop()
|
||||
pollTicker = pt.C
|
||||
|
||||
start := time.Now()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return fmt.Errorf("timeout waiting for auth key reissue after %v", maxWait)
|
||||
case <-pollTicker: // Waits for polling tick, continues when received
|
||||
case event := <-eventChan:
|
||||
if event.Name != toWatch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
newAuthKey := authkeyFromTailscaledConfig(configPath)
|
||||
if newAuthKey != "" && newAuthKey != oldAuthKey {
|
||||
log.Printf("New auth key received from operator after %v", time.Since(start).Round(time.Second))
|
||||
|
||||
if err := kc.clearReissueAuthKeyRequest(ctx); err != nil {
|
||||
log.Printf("Warning: failed to clear reissue request: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if eventChan == nil && pollTicker != nil {
|
||||
log.Printf("Waiting for new auth key from operator (%v elapsed)", time.Since(start).Round(time.Second))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// clearReissueAuthKeyRequest removes the reissue_authkey marker from the Secret
|
||||
// to signal to the operator that we've successfully received the new key.
|
||||
func (kc *kubeClient) clearReissueAuthKeyRequest(ctx context.Context) error {
|
||||
s := &kubeapi.Secret{
|
||||
Data: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: nil,
|
||||
},
|
||||
}
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, fieldManager)
|
||||
}
|
||||
|
||||
// waitForConsistentState waits for tailscaled to finish writing state if it
|
||||
|
||||
@@ -248,25 +248,42 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
capver := fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)
|
||||
for name, tc := range map[string]struct {
|
||||
podUID string
|
||||
authkey string
|
||||
initial map[string][]byte
|
||||
expected map[string][]byte
|
||||
}{
|
||||
"empty_initial": {
|
||||
podUID: "1234",
|
||||
authkey: "new-authkey",
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyPodUID: []byte("1234"),
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"empty_initial_no_pod_uid": {
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"only_relevant_keys_updated": {
|
||||
podUID: "1234",
|
||||
podUID: "1234",
|
||||
authkey: "new-authkey",
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyCapVer: []byte("1"),
|
||||
kubetypes.KeyPodUID: []byte("5678"),
|
||||
@@ -295,6 +312,57 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
// Tailscaled keys not included in patch.
|
||||
},
|
||||
},
|
||||
"new_authkey_issued": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "new-authkey",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyReissueAuthkey: nil,
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"authkey_not_yet_updated": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "old-authkey",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// reissue_authkey not cleared.
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"authkey_deleted_from_config": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// reissue_authkey not cleared.
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
var actual map[string][]byte
|
||||
@@ -309,7 +377,7 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
return nil
|
||||
},
|
||||
}}
|
||||
if err := kc.resetContainerbootState(context.Background(), tc.podUID); err != nil {
|
||||
if err := kc.resetContainerbootState(context.Background(), tc.podUID, tc.authkey); err != nil {
|
||||
t.Fatalf("resetContainerbootState() error = %v", err)
|
||||
}
|
||||
if diff := cmp.Diff(tc.expected, actual); diff != "" {
|
||||
|
||||
@@ -137,7 +137,9 @@ import (
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
"tailscale.com/client/tailscale"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/ipn/conffile"
|
||||
kubeutils "tailscale.com/k8s-operator"
|
||||
healthz "tailscale.com/kube/health"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
@@ -206,6 +208,11 @@ func run() error {
|
||||
bootCtx, cancel := context.WithTimeout(ctx, 60*time.Second)
|
||||
defer cancel()
|
||||
|
||||
var tailscaledConfigAuthkey string
|
||||
if isOneStepConfig(cfg) {
|
||||
tailscaledConfigAuthkey = authkeyFromTailscaledConfig(cfg.TailscaledConfigFilePath)
|
||||
}
|
||||
|
||||
var kc *kubeClient
|
||||
if cfg.KubeSecret != "" {
|
||||
kc, err = newKubeClient(cfg.Root, cfg.KubeSecret)
|
||||
@@ -219,7 +226,7 @@ func run() error {
|
||||
// hasKubeStateStore because although we know we're in kube, that
|
||||
// doesn't guarantee the state store is properly configured.
|
||||
if hasKubeStateStore(cfg) {
|
||||
if err := kc.resetContainerbootState(bootCtx, cfg.PodUID); err != nil {
|
||||
if err := kc.resetContainerbootState(bootCtx, cfg.PodUID, tailscaledConfigAuthkey); err != nil {
|
||||
return fmt.Errorf("error clearing previous state from Secret: %w", err)
|
||||
}
|
||||
}
|
||||
@@ -299,7 +306,7 @@ func run() error {
|
||||
}
|
||||
}
|
||||
|
||||
w, err := client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialPrefs|ipn.NotifyInitialState)
|
||||
w, err := client.WatchIPNBus(bootCtx, ipn.NotifyInitialNetMap|ipn.NotifyInitialPrefs|ipn.NotifyInitialState|ipn.NotifyInitialHealthState)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to watch tailscaled for updates: %w", err)
|
||||
}
|
||||
@@ -365,8 +372,23 @@ authLoop:
|
||||
if isOneStepConfig(cfg) {
|
||||
// This could happen if this is the first time tailscaled was run for this
|
||||
// device and the auth key was not passed via the configfile.
|
||||
return fmt.Errorf("invalid state: tailscaled daemon started with a config file, but tailscale is not logged in: ensure you pass a valid auth key in the config file.")
|
||||
if hasKubeStateStore(cfg) {
|
||||
log.Printf("Auth key missing or invalid (NeedsLogin state), disconnecting from control and requesting new key from operator")
|
||||
|
||||
err := kc.setAndWaitForAuthKeyReissue(bootCtx, client, cfg, tailscaledConfigAuthkey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get a reissued authkey: %w", err)
|
||||
}
|
||||
|
||||
log.Printf("Successfully received new auth key, restarting to apply configuration")
|
||||
|
||||
// we don't return an error here since we have handled the reissue gracefully.
|
||||
return nil
|
||||
}
|
||||
|
||||
return errors.New("invalid state: tailscaled daemon started with a config file, but tailscale is not logged in: ensure you pass a valid auth key in the config file")
|
||||
}
|
||||
|
||||
if err := authTailscale(); err != nil {
|
||||
return fmt.Errorf("failed to auth tailscale: %w", err)
|
||||
}
|
||||
@@ -384,6 +406,27 @@ authLoop:
|
||||
log.Printf("tailscaled in state %q, waiting", *n.State)
|
||||
}
|
||||
}
|
||||
|
||||
if n.Health != nil {
|
||||
// This can happen if the config has an auth key but it's invalid,
|
||||
// for example if it was single-use and already got used, but the
|
||||
// device state was lost.
|
||||
if _, ok := n.Health.Warnings[health.LoginStateWarnable.Code]; ok {
|
||||
if isOneStepConfig(cfg) && hasKubeStateStore(cfg) {
|
||||
log.Printf("Auth key failed to authenticate (may be expired or single-use), disconnecting from control and requesting new key from operator")
|
||||
|
||||
err := kc.setAndWaitForAuthKeyReissue(bootCtx, client, cfg, tailscaledConfigAuthkey)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get a reissued authkey: %w", err)
|
||||
}
|
||||
|
||||
// we don't return an error here since we have handled the reissue gracefully.
|
||||
log.Printf("Successfully received new auth key, restarting to apply configuration")
|
||||
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
w.Close()
|
||||
@@ -409,9 +452,9 @@ authLoop:
|
||||
// We were told to only auth once, so any secret-bound
|
||||
// authkey is no longer needed. We don't strictly need to
|
||||
// wipe it, but it's good hygiene.
|
||||
log.Printf("Deleting authkey from kube secret")
|
||||
log.Printf("Deleting authkey from Kubernetes Secret")
|
||||
if err := kc.deleteAuthKey(ctx); err != nil {
|
||||
return fmt.Errorf("deleting authkey from kube secret: %w", err)
|
||||
return fmt.Errorf("deleting authkey from Kubernetes Secret: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -422,8 +465,10 @@ authLoop:
|
||||
|
||||
// If tailscaled config was read from a mounted file, watch the file for updates and reload.
|
||||
cfgWatchErrChan := make(chan error)
|
||||
cfgWatchCtx, cfgWatchCancel := context.WithCancel(ctx)
|
||||
defer cfgWatchCancel()
|
||||
if cfg.TailscaledConfigFilePath != "" {
|
||||
go watchTailscaledConfigChanges(ctx, cfg.TailscaledConfigFilePath, client, cfgWatchErrChan)
|
||||
go watchTailscaledConfigChanges(cfgWatchCtx, cfg.TailscaledConfigFilePath, client, cfgWatchErrChan)
|
||||
}
|
||||
|
||||
var (
|
||||
@@ -523,6 +568,7 @@ runLoop:
|
||||
case err := <-cfgWatchErrChan:
|
||||
return fmt.Errorf("failed to watch tailscaled config: %w", err)
|
||||
case n := <-notifyChan:
|
||||
// TODO: (ChaosInTheCRD) Add node removed check when supported by ipn
|
||||
if n.State != nil && *n.State != ipn.Running {
|
||||
// Something's gone wrong and we've left the authenticated state.
|
||||
// Our container image never recovered gracefully from this, and the
|
||||
@@ -979,3 +1025,11 @@ func serviceIPsFromNetMap(nm *netmap.NetworkMap, fqdn dnsname.FQDN) []netip.Pref
|
||||
|
||||
return prefixes
|
||||
}
|
||||
|
||||
func authkeyFromTailscaledConfig(path string) string {
|
||||
if cfg, err := conffile.Load(path); err == nil && cfg.Parsed.AuthKey != nil {
|
||||
return *cfg.Parsed.AuthKey
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
+174
-17
@@ -32,6 +32,7 @@ import (
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"golang.org/x/sys/unix"
|
||||
"tailscale.com/health"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/egressservices"
|
||||
"tailscale.com/kube/kubeclient"
|
||||
@@ -41,6 +42,8 @@ import (
|
||||
"tailscale.com/types/netmap"
|
||||
)
|
||||
|
||||
const configFileAuthKey = "some-auth-key"
|
||||
|
||||
func TestContainerBoot(t *testing.T) {
|
||||
boot := filepath.Join(t.TempDir(), "containerboot")
|
||||
if err := exec.Command("go", "build", "-ldflags", "-X main.testSleepDuration=1ms", "-o", boot, "tailscale.com/cmd/containerboot").Run(); err != nil {
|
||||
@@ -77,6 +80,10 @@ func TestContainerBoot(t *testing.T) {
|
||||
// phase (simulates our fake tailscaled doing it).
|
||||
UpdateKubeSecret map[string]string
|
||||
|
||||
// Update files with these paths/contents at the beginning of the phase
|
||||
// (simulates the operator updating mounted config files).
|
||||
UpdateFiles map[string]string
|
||||
|
||||
// WantFiles files that should exist in the container and their
|
||||
// contents.
|
||||
WantFiles map[string]string
|
||||
@@ -781,6 +788,127 @@ func TestContainerBoot(t *testing.T) {
|
||||
},
|
||||
}
|
||||
},
|
||||
"sets_reissue_authkey_if_needs_login": func(env *testEnv) testCase {
|
||||
newAuthKey := "new-reissued-auth-key"
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
"TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR": filepath.Join(env.d, "etc/tailscaled/"),
|
||||
"KUBERNETES_SERVICE_HOST": env.kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": env.kube.Port,
|
||||
},
|
||||
Phases: []phase{
|
||||
{
|
||||
UpdateFiles: map[string]string{
|
||||
"etc/tailscaled/..data": "",
|
||||
},
|
||||
WantCmds: []string{
|
||||
"/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=kube:tailscale --statedir=/tmp --tun=userspace-networking --config=/etc/tailscaled/cap-95.hujson",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
}, {
|
||||
Notify: &ipn.Notify{
|
||||
State: new(ipn.NeedsLogin),
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyReissueAuthkey: configFileAuthKey,
|
||||
},
|
||||
WantLog: "watching for config changes via fsnotify",
|
||||
}, {
|
||||
UpdateFiles: map[string]string{
|
||||
"etc/tailscaled/cap-95.hujson": fmt.Sprintf(`{"Version":"alpha0","AuthKey":"%s"}`, newAuthKey),
|
||||
"etc/tailscaled/..data": "updated",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
WantExitCode: new(0),
|
||||
WantLog: "Successfully received new auth key, restarting to apply configuration",
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
"sets_reissue_authkey_if_auth_fails": func(env *testEnv) testCase {
|
||||
newAuthKey := "new-reissued-auth-key"
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
"TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR": filepath.Join(env.d, "etc/tailscaled/"),
|
||||
"KUBERNETES_SERVICE_HOST": env.kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": env.kube.Port,
|
||||
},
|
||||
Phases: []phase{
|
||||
{
|
||||
UpdateFiles: map[string]string{
|
||||
"etc/tailscaled/..data": "",
|
||||
},
|
||||
WantCmds: []string{
|
||||
"/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=kube:tailscale --statedir=/tmp --tun=userspace-networking --config=/etc/tailscaled/cap-95.hujson",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
}, {
|
||||
Notify: &ipn.Notify{
|
||||
Health: &health.State{
|
||||
Warnings: map[health.WarnableCode]health.UnhealthyState{
|
||||
health.LoginStateWarnable.Code: {},
|
||||
},
|
||||
},
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyReissueAuthkey: configFileAuthKey,
|
||||
},
|
||||
WantLog: "watching for config changes via fsnotify",
|
||||
}, {
|
||||
UpdateFiles: map[string]string{
|
||||
"etc/tailscaled/cap-95.hujson": fmt.Sprintf(`{"Version":"alpha0","AuthKey":"%s"}`, newAuthKey),
|
||||
"etc/tailscaled/..data": "updated",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
WantExitCode: new(0),
|
||||
WantLog: "Successfully received new auth key, restarting to apply configuration",
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
"clears_reissue_authkey_on_change": func(env *testEnv) testCase {
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
"TS_EXPERIMENTAL_VERSIONED_CONFIG_DIR": filepath.Join(env.d, "etc/tailscaled/"),
|
||||
"KUBERNETES_SERVICE_HOST": env.kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": env.kube.Port,
|
||||
},
|
||||
KubeSecret: map[string]string{
|
||||
kubetypes.KeyReissueAuthkey: "some-older-authkey",
|
||||
"foo": "bar", // Check not everything is cleared.
|
||||
},
|
||||
Phases: []phase{
|
||||
{
|
||||
WantCmds: []string{
|
||||
"/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=kube:tailscale --statedir=/tmp --tun=userspace-networking --config=/etc/tailscaled/cap-95.hujson",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
"foo": "bar",
|
||||
},
|
||||
}, {
|
||||
Notify: runningNotify,
|
||||
WantKubeSecret: map[string]string{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
"foo": "bar",
|
||||
kubetypes.KeyDeviceFQDN: "test-node.test.ts.net.",
|
||||
kubetypes.KeyDeviceID: "myID",
|
||||
kubetypes.KeyDeviceIPs: `["100.64.0.1"]`,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
"metrics_enabled": func(env *testEnv) testCase {
|
||||
return testCase{
|
||||
Env: map[string]string{
|
||||
@@ -1134,19 +1262,22 @@ func TestContainerBoot(t *testing.T) {
|
||||
for k, v := range p.UpdateKubeSecret {
|
||||
env.kube.SetSecret(k, v)
|
||||
}
|
||||
for path, content := range p.UpdateFiles {
|
||||
fullPath := filepath.Join(env.d, path)
|
||||
if err := os.WriteFile(fullPath, []byte(content), 0700); err != nil {
|
||||
t.Fatalf("phase %d: updating file %q: %v", i, path, err)
|
||||
}
|
||||
// Explicitly update mtime to ensure fsnotify detects the change.
|
||||
// Without this, file operations can be buffered and fsnotify events may not trigger.
|
||||
now := time.Now()
|
||||
if err := os.Chtimes(fullPath, now, now); err != nil {
|
||||
t.Fatalf("phase %d: updating mtime for %q: %v", i, path, err)
|
||||
}
|
||||
}
|
||||
env.lapi.Notify(p.Notify)
|
||||
if p.Signal != nil {
|
||||
cmd.Process.Signal(*p.Signal)
|
||||
}
|
||||
if p.WantLog != "" {
|
||||
err := tstest.WaitFor(2*time.Second, func() error {
|
||||
waitLogLine(t, time.Second, cbOut, p.WantLog)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if p.WantExitCode != nil {
|
||||
state, err := cmd.Process.Wait()
|
||||
@@ -1156,14 +1287,19 @@ func TestContainerBoot(t *testing.T) {
|
||||
if state.ExitCode() != *p.WantExitCode {
|
||||
t.Fatalf("phase %d: want exit code %d, got %d", i, *p.WantExitCode, state.ExitCode())
|
||||
}
|
||||
|
||||
// Early test return, we don't expect the successful startup log message.
|
||||
return
|
||||
}
|
||||
|
||||
wantCmds = append(wantCmds, p.WantCmds...)
|
||||
waitArgs(t, 2*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
|
||||
err := tstest.WaitFor(2*time.Second, func() error {
|
||||
if p.WantLog != "" {
|
||||
err := tstest.WaitFor(5*time.Second, func() error {
|
||||
waitLogLine(t, 5*time.Second, cbOut, p.WantLog)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
err := tstest.WaitFor(5*time.Second, func() error {
|
||||
if p.WantKubeSecret != nil {
|
||||
got := env.kube.Secret()
|
||||
if diff := cmp.Diff(got, p.WantKubeSecret); diff != "" {
|
||||
@@ -1180,6 +1316,16 @@ func TestContainerBoot(t *testing.T) {
|
||||
if err != nil {
|
||||
t.Fatalf("test: %q phase %d: %v", name, i, err)
|
||||
}
|
||||
|
||||
// if we provide a wanted exit code, we expect that the process is finished,
|
||||
// so should return from the test.
|
||||
if p.WantExitCode != nil {
|
||||
return
|
||||
}
|
||||
|
||||
wantCmds = append(wantCmds, p.WantCmds...)
|
||||
waitArgs(t, 2*time.Second, env.d, env.argFile, strings.Join(wantCmds, "\n"))
|
||||
|
||||
err = tstest.WaitFor(2*time.Second, func() error {
|
||||
for path, want := range p.WantFiles {
|
||||
gotBs, err := os.ReadFile(filepath.Join(env.d, path))
|
||||
@@ -1393,6 +1539,13 @@ func (lc *localAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
default:
|
||||
panic(fmt.Sprintf("unsupported method %q", r.Method))
|
||||
}
|
||||
// In the localAPI ServeHTTP method
|
||||
case "/localapi/v0/disconnect-control":
|
||||
if r.Method != "POST" {
|
||||
panic(fmt.Sprintf("unsupported method %q", r.Method))
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
return
|
||||
default:
|
||||
panic(fmt.Sprintf("unsupported path %q", r.URL.Path))
|
||||
}
|
||||
@@ -1591,7 +1744,11 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) {
|
||||
panic(fmt.Sprintf("json decode failed: %v. Body:\n\n%s", err, string(bs)))
|
||||
}
|
||||
for key, val := range req.Data {
|
||||
k.secret[key] = string(val)
|
||||
if val == nil {
|
||||
delete(k.secret, key)
|
||||
} else {
|
||||
k.secret[key] = string(val)
|
||||
}
|
||||
}
|
||||
default:
|
||||
panic(fmt.Sprintf("unknown content type %q", r.Header.Get("Content-Type")))
|
||||
@@ -1659,7 +1816,7 @@ func newTestEnv(t *testing.T) testEnv {
|
||||
kube.Start(t)
|
||||
t.Cleanup(kube.Close)
|
||||
|
||||
tailscaledConf := &ipn.ConfigVAlpha{AuthKey: new("foo"), Version: "alpha0"}
|
||||
tailscaledConf := &ipn.ConfigVAlpha{AuthKey: new(configFileAuthKey), Version: "alpha0"}
|
||||
serveConf := ipn.ServeConfig{TCP: map[uint16]*ipn.TCPPortHandler{80: {HTTP: true}}}
|
||||
serveConfWithServices := ipn.ServeConfig{
|
||||
TCP: map[uint16]*ipn.TCPPortHandler{80: {HTTP: true}},
|
||||
|
||||
Reference in New Issue
Block a user