cmd/{containerboot,k8s-operator}: reissue auth keys for broken proxies (#16450)
Adds logic for containerboot to signal that it can't auth, so the
operator can reissue a new auth key. This only applies when running with
a config file and with a kube state store.
If the operator sees reissue_authkey in a state Secret, it will create a
new auth key iff the config has no auth key or its auth key matches the
value of reissue_authkey from the state Secret. This is to ensure we
don't reissue auth keys in a tight loop if the proxy is slow to start or
failing for some other reason. The reissue logic also uses a burstable
rate limiter to ensure there's no way a terminally misconfigured
or buggy operator can automatically generate new auth keys in a tight loop.
Additional implementation details (ChaosInTheCRD):
- Added `ipn.NotifyInitialHealthState` to ipn watcher, to ensure that
`n.Health` is populated when notify's are returned.
- on auth failure, containerboot:
- Disconnects from control server
- Sets reissue_authkey marker in state Secret with the failing key
- Polls config file for new auth key (10 minute timeout)
- Restarts after receiving new key to apply it
- modified operator's reissue logic slightly:
- Deletes old device from tailnet before creating new key
- Rate limiting: 1 key per 30s with initial burst equal to replica count
- In-flight tracking (authKeyReissuing map) prevents duplicate API calls
across reconcile loops
Updates #14080
Change-Id: I6982f8e741932a6891f2f48a2936f7f6a455317f
(cherry picked from commit 969927c47c3d4de05e90f5b26a6d8d931c5ceed4)
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
Co-authored-by: chaosinthecrd <tom@tmlabs.co.uk>
This commit is contained in:
@@ -1111,7 +1111,7 @@ func tailscaledConfig(stsC *tailscaleSTSConfig, loginUrl string, newAuthkey stri
|
||||
|
||||
if newAuthkey != "" {
|
||||
conf.AuthKey = &newAuthkey
|
||||
} else if shouldRetainAuthKey(oldSecret) {
|
||||
} else if !deviceAuthed(oldSecret) {
|
||||
key, err := authKeyFromSecret(oldSecret)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error retrieving auth key from Secret: %w", err)
|
||||
@@ -1164,6 +1164,8 @@ func latestConfigFromSecret(s *corev1.Secret) (*ipn.ConfigVAlpha, error) {
|
||||
return conf, nil
|
||||
}
|
||||
|
||||
// authKeyFromSecret returns the auth key from the latest config version if
|
||||
// found, or else nil.
|
||||
func authKeyFromSecret(s *corev1.Secret) (key *string, err error) {
|
||||
conf, err := latestConfigFromSecret(s)
|
||||
if err != nil {
|
||||
@@ -1180,13 +1182,13 @@ func authKeyFromSecret(s *corev1.Secret) (key *string, err error) {
|
||||
return key, nil
|
||||
}
|
||||
|
||||
// shouldRetainAuthKey returns true if the state stored in a proxy's state Secret suggests that auth key should be
|
||||
// retained (because the proxy has not yet successfully authenticated).
|
||||
func shouldRetainAuthKey(s *corev1.Secret) bool {
|
||||
// deviceAuthed returns true if the state stored in a proxy's state Secret
|
||||
// suggests that the proxy has successfully authenticated.
|
||||
func deviceAuthed(s *corev1.Secret) bool {
|
||||
if s == nil {
|
||||
return false // nothing to retain here
|
||||
return false // No state Secret means no device state.
|
||||
}
|
||||
return len(s.Data["device_id"]) == 0 // proxy has not authed yet
|
||||
return len(s.Data["device_id"]) > 0
|
||||
}
|
||||
|
||||
func shouldAcceptRoutes(pc *tsapi.ProxyClass) bool {
|
||||
|
||||
Reference in New Issue
Block a user