cmd/{containerboot,k8s-operator}: reissue auth keys for broken proxies (#16450)
Adds logic for containerboot to signal that it can't auth, so the
operator can reissue a new auth key. This only applies when running with
a config file and with a kube state store.
If the operator sees reissue_authkey in a state Secret, it will create a
new auth key iff the config has no auth key or its auth key matches the
value of reissue_authkey from the state Secret. This is to ensure we
don't reissue auth keys in a tight loop if the proxy is slow to start or
failing for some other reason. The reissue logic also uses a burstable
rate limiter to ensure there's no way a terminally misconfigured
or buggy operator can automatically generate new auth keys in a tight loop.
Additional implementation details (ChaosInTheCRD):
- Added `ipn.NotifyInitialHealthState` to ipn watcher, to ensure that
`n.Health` is populated when notify's are returned.
- on auth failure, containerboot:
- Disconnects from control server
- Sets reissue_authkey marker in state Secret with the failing key
- Polls config file for new auth key (10 minute timeout)
- Restarts after receiving new key to apply it
- modified operator's reissue logic slightly:
- Deletes old device from tailnet before creating new key
- Rate limiting: 1 key per 30s with initial burst equal to replica count
- In-flight tracking (authKeyReissuing map) prevents duplicate API calls
across reconcile loops
Updates #14080
Change-Id: I6982f8e741932a6891f2f48a2936f7f6a455317f
(cherry picked from commit 969927c47c3d4de05e90f5b26a6d8d931c5ceed4)
Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
Co-authored-by: chaosinthecrd <tom@tmlabs.co.uk>
This commit is contained in:
@@ -248,25 +248,42 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
capver := fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)
|
||||
for name, tc := range map[string]struct {
|
||||
podUID string
|
||||
authkey string
|
||||
initial map[string][]byte
|
||||
expected map[string][]byte
|
||||
}{
|
||||
"empty_initial": {
|
||||
podUID: "1234",
|
||||
authkey: "new-authkey",
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyPodUID: []byte("1234"),
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"empty_initial_no_pod_uid": {
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"only_relevant_keys_updated": {
|
||||
podUID: "1234",
|
||||
podUID: "1234",
|
||||
authkey: "new-authkey",
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyCapVer: []byte("1"),
|
||||
kubetypes.KeyPodUID: []byte("5678"),
|
||||
@@ -295,6 +312,57 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
// Tailscaled keys not included in patch.
|
||||
},
|
||||
},
|
||||
"new_authkey_issued": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "new-authkey",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyReissueAuthkey: nil,
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"authkey_not_yet_updated": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "old-authkey",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// reissue_authkey not cleared.
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
"authkey_deleted_from_config": {
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyReissueAuthkey: []byte("old-authkey"),
|
||||
},
|
||||
authkey: "",
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
// reissue_authkey not cleared.
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
var actual map[string][]byte
|
||||
@@ -309,7 +377,7 @@ func TestResetContainerbootState(t *testing.T) {
|
||||
return nil
|
||||
},
|
||||
}}
|
||||
if err := kc.resetContainerbootState(context.Background(), tc.podUID); err != nil {
|
||||
if err := kc.resetContainerbootState(context.Background(), tc.podUID, tc.authkey); err != nil {
|
||||
t.Fatalf("resetContainerbootState() error = %v", err)
|
||||
}
|
||||
if diff := cmp.Diff(tc.expected, actual); diff != "" {
|
||||
|
||||
Reference in New Issue
Block a user