cmd/{containerboot,k8s-operator}: use state Secret for checking device auth (#16328)
Previously, the operator checked the ProxyGroup status fields for information on how many of the proxies had successfully authed. Use their state Secrets instead as a more reliable source of truth. containerboot has written device_fqdn and device_ips keys to the state Secret since inception, and pod_uid since 1.78.0, so there's no need to use the API for that data. Read it from the state Secret for consistency. However, to ensure we don't read data from a previous run of containerboot, make sure we reset containerboot's state keys on startup. One other knock-on effect of that is ProxyGroups can briefly be marked not Ready while a Pod is restarting. Introduce a new ProxyGroupAvailable condition to more accurately reflect when downstream controllers can implement flows that rely on a ProxyGroup having at least 1 proxy Pod running. Fixes #16327 Change-Id: I026c18e9d23e87109a471a87b8e4fb6271716a66 Signed-off-by: Tom Proctor <tomhjp@users.noreply.github.com>
This commit is contained in:
+32
-10
@@ -18,12 +18,15 @@ import (
|
||||
"time"
|
||||
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/egressservices"
|
||||
"tailscale.com/kube/ingressservices"
|
||||
"tailscale.com/kube/kubeapi"
|
||||
"tailscale.com/kube/kubeclient"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/logtail/backoff"
|
||||
"tailscale.com/tailcfg"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/set"
|
||||
)
|
||||
|
||||
// kubeClient is a wrapper around Tailscale's internal kube client that knows how to talk to the kube API server. We use
|
||||
@@ -117,20 +120,39 @@ func (kc *kubeClient) deleteAuthKey(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// storeCapVerUID stores the current capability version of tailscale and, if provided, UID of the Pod in the tailscale
|
||||
// state Secret.
|
||||
// These two fields are used by the Kubernetes Operator to observe the current capability version of tailscaled running in this container.
|
||||
func (kc *kubeClient) storeCapVerUID(ctx context.Context, podUID string) error {
|
||||
capVerS := fmt.Sprintf("%d", tailcfg.CurrentCapabilityVersion)
|
||||
d := map[string][]byte{
|
||||
kubetypes.KeyCapVer: []byte(capVerS),
|
||||
// resetContainerbootState resets state from previous runs of containerboot to
|
||||
// ensure the operator doesn't use stale state when a Pod is first recreated.
|
||||
func (kc *kubeClient) resetContainerbootState(ctx context.Context, podUID string) error {
|
||||
existingSecret, err := kc.GetSecret(ctx, kc.stateSecret)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read state Secret %q to reset state: %w", kc.stateSecret, err)
|
||||
}
|
||||
|
||||
s := &kubeapi.Secret{
|
||||
Data: map[string][]byte{
|
||||
kubetypes.KeyCapVer: fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion),
|
||||
},
|
||||
}
|
||||
if podUID != "" {
|
||||
d[kubetypes.KeyPodUID] = []byte(podUID)
|
||||
s.Data[kubetypes.KeyPodUID] = []byte(podUID)
|
||||
}
|
||||
s := &kubeapi.Secret{
|
||||
Data: d,
|
||||
|
||||
toClear := set.SetOf([]string{
|
||||
kubetypes.KeyDeviceID,
|
||||
kubetypes.KeyDeviceFQDN,
|
||||
kubetypes.KeyDeviceIPs,
|
||||
kubetypes.KeyHTTPSEndpoint,
|
||||
egressservices.KeyEgressServices,
|
||||
ingressservices.IngressConfigKey,
|
||||
})
|
||||
for key := range existingSecret.Data {
|
||||
if toClear.Contains(key) {
|
||||
// It's fine to leave the key in place as a debugging breadcrumb,
|
||||
// it should get a new value soon.
|
||||
s.Data[key] = nil
|
||||
}
|
||||
}
|
||||
|
||||
return kc.StrategicMergePatchSecret(ctx, kc.stateSecret, s, "tailscale-container")
|
||||
}
|
||||
|
||||
|
||||
@@ -8,13 +8,18 @@ package main
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
"tailscale.com/ipn"
|
||||
"tailscale.com/kube/egressservices"
|
||||
"tailscale.com/kube/ingressservices"
|
||||
"tailscale.com/kube/kubeapi"
|
||||
"tailscale.com/kube/kubeclient"
|
||||
"tailscale.com/kube/kubetypes"
|
||||
"tailscale.com/tailcfg"
|
||||
)
|
||||
|
||||
func TestSetupKube(t *testing.T) {
|
||||
@@ -238,3 +243,78 @@ func TestWaitForConsistentState(t *testing.T) {
|
||||
t.Fatalf("expected nil, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResetContainerbootState(t *testing.T) {
|
||||
capver := fmt.Appendf(nil, "%d", tailcfg.CurrentCapabilityVersion)
|
||||
for name, tc := range map[string]struct {
|
||||
podUID string
|
||||
initial map[string][]byte
|
||||
expected map[string][]byte
|
||||
}{
|
||||
"empty_initial": {
|
||||
podUID: "1234",
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyPodUID: []byte("1234"),
|
||||
},
|
||||
},
|
||||
"empty_initial_no_pod_uid": {
|
||||
initial: map[string][]byte{},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
"only_relevant_keys_updated": {
|
||||
podUID: "1234",
|
||||
initial: map[string][]byte{
|
||||
kubetypes.KeyCapVer: []byte("1"),
|
||||
kubetypes.KeyPodUID: []byte("5678"),
|
||||
kubetypes.KeyDeviceID: []byte("device-id"),
|
||||
kubetypes.KeyDeviceFQDN: []byte("device-fqdn"),
|
||||
kubetypes.KeyDeviceIPs: []byte(`["192.0.2.1"]`),
|
||||
kubetypes.KeyHTTPSEndpoint: []byte("https://example.com"),
|
||||
egressservices.KeyEgressServices: []byte("egress-services"),
|
||||
ingressservices.IngressConfigKey: []byte("ingress-config"),
|
||||
"_current-profile": []byte("current-profile"),
|
||||
"_machinekey": []byte("machine-key"),
|
||||
"_profiles": []byte("profiles"),
|
||||
"_serve_e0ce": []byte("serve-e0ce"),
|
||||
"profile-e0ce": []byte("profile-e0ce"),
|
||||
},
|
||||
expected: map[string][]byte{
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyPodUID: []byte("1234"),
|
||||
// Cleared keys.
|
||||
kubetypes.KeyDeviceID: nil,
|
||||
kubetypes.KeyDeviceFQDN: nil,
|
||||
kubetypes.KeyDeviceIPs: nil,
|
||||
kubetypes.KeyHTTPSEndpoint: nil,
|
||||
egressservices.KeyEgressServices: nil,
|
||||
ingressservices.IngressConfigKey: nil,
|
||||
// Tailscaled keys not included in patch.
|
||||
},
|
||||
},
|
||||
} {
|
||||
t.Run(name, func(t *testing.T) {
|
||||
var actual map[string][]byte
|
||||
kc := &kubeClient{stateSecret: "foo", Client: &kubeclient.FakeClient{
|
||||
GetSecretImpl: func(context.Context, string) (*kubeapi.Secret, error) {
|
||||
return &kubeapi.Secret{
|
||||
Data: tc.initial,
|
||||
}, nil
|
||||
},
|
||||
StrategicMergePatchSecretImpl: func(ctx context.Context, name string, secret *kubeapi.Secret, _ string) error {
|
||||
actual = secret.Data
|
||||
return nil
|
||||
},
|
||||
}}
|
||||
if err := kc.resetContainerbootState(context.Background(), tc.podUID); err != nil {
|
||||
t.Fatalf("resetContainerbootState() error = %v", err)
|
||||
}
|
||||
if diff := cmp.Diff(tc.expected, actual); diff != "" {
|
||||
t.Errorf("resetContainerbootState() mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,6 +188,14 @@ func run() error {
|
||||
if err := cfg.setupKube(bootCtx, kc); err != nil {
|
||||
return fmt.Errorf("error setting up for running on Kubernetes: %w", err)
|
||||
}
|
||||
// Clear out any state from previous runs of containerboot. Check
|
||||
// hasKubeStateStore because although we know we're in kube, that
|
||||
// doesn't guarantee the state store is properly configured.
|
||||
if hasKubeStateStore(cfg) {
|
||||
if err := kc.resetContainerbootState(bootCtx, cfg.PodUID); err != nil {
|
||||
return fmt.Errorf("error clearing previous state from Secret: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
client, daemonProcess, err := startTailscaled(bootCtx, cfg)
|
||||
@@ -367,11 +375,6 @@ authLoop:
|
||||
if err := client.SetServeConfig(ctx, new(ipn.ServeConfig)); err != nil {
|
||||
return fmt.Errorf("failed to unset serve config: %w", err)
|
||||
}
|
||||
if hasKubeStateStore(cfg) {
|
||||
if err := kc.storeHTTPSEndpoint(ctx, ""); err != nil {
|
||||
return fmt.Errorf("failed to update HTTPS endpoint in tailscale state: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if hasKubeStateStore(cfg) && isTwoStepConfigAuthOnce(cfg) {
|
||||
@@ -384,12 +387,6 @@ authLoop:
|
||||
}
|
||||
}
|
||||
|
||||
if hasKubeStateStore(cfg) {
|
||||
if err := kc.storeCapVerUID(ctx, cfg.PodUID); err != nil {
|
||||
return fmt.Errorf("storing capability version and UID: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
w, err = client.WatchIPNBus(ctx, ipn.NotifyInitialNetMap|ipn.NotifyInitialState)
|
||||
if err != nil {
|
||||
return fmt.Errorf("rewatching tailscaled for updates after auth: %w", err)
|
||||
|
||||
@@ -460,6 +460,7 @@ func TestContainerBoot(t *testing.T) {
|
||||
Env: map[string]string{
|
||||
"KUBERNETES_SERVICE_HOST": env.kube.Host,
|
||||
"KUBERNETES_SERVICE_PORT_HTTPS": env.kube.Port,
|
||||
"POD_UID": "some-pod-uid",
|
||||
},
|
||||
KubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
@@ -471,17 +472,20 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock up --accept-dns=false --authkey=tskey-key",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"authkey": "tskey-key",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyPodUID: "some-pod-uid",
|
||||
},
|
||||
},
|
||||
{
|
||||
Notify: runningNotify,
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"tailscale_capver": capver,
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
kubetypes.KeyCapVer: capver,
|
||||
kubetypes.KeyPodUID: "some-pod-uid",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -554,7 +558,8 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscaled --socket=/tmp/tailscaled.sock --state=kube:tailscale --statedir=/tmp --tun=userspace-networking",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"authkey": "tskey-key",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -565,7 +570,8 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock up --accept-dns=false --authkey=tskey-key",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"authkey": "tskey-key",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -574,10 +580,10 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock set --accept-dns=false",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"tailscale_capver": capver,
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -599,17 +605,18 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock up --accept-dns=false --authkey=tskey-key",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"authkey": "tskey-key",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
{
|
||||
Notify: runningNotify,
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"tailscale_capver": capver,
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -624,11 +631,11 @@ func TestContainerBoot(t *testing.T) {
|
||||
},
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "new-name.test.ts.net",
|
||||
"device_id": "newID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"tailscale_capver": capver,
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "new-name.test.ts.net",
|
||||
"device_id": "newID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -912,18 +919,19 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock up --accept-dns=false --authkey=tskey-key",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"authkey": "tskey-key",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
{
|
||||
Notify: runningNotify,
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"https_endpoint": "no-https",
|
||||
"tailscale_capver": capver,
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"https_endpoint": "no-https",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -947,7 +955,8 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock up --accept-dns=false --authkey=tskey-key",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"authkey": "tskey-key",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
EndpointStatuses: map[string]int{
|
||||
egressSvcTerminateURL(env.localAddrPort): 200,
|
||||
@@ -956,12 +965,12 @@ func TestContainerBoot(t *testing.T) {
|
||||
{
|
||||
Notify: runningNotify,
|
||||
WantKubeSecret: map[string]string{
|
||||
"egress-services": mustBase64(t, egressStatus),
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
"tailscale_capver": capver,
|
||||
"egress-services": string(mustJSON(t, egressStatus)),
|
||||
"authkey": "tskey-key",
|
||||
"device_fqdn": "test-node.test.ts.net",
|
||||
"device_id": "myID",
|
||||
"device_ips": `["100.64.0.1"]`,
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
EndpointStatuses: map[string]int{
|
||||
egressSvcTerminateURL(env.localAddrPort): 200,
|
||||
@@ -1002,7 +1011,8 @@ func TestContainerBoot(t *testing.T) {
|
||||
"/usr/bin/tailscale --socket=/tmp/tailscaled.sock up --accept-dns=false --authkey=tskey-key",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"authkey": "tskey-key",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
},
|
||||
{
|
||||
@@ -1016,10 +1026,11 @@ func TestContainerBoot(t *testing.T) {
|
||||
// Missing "_current-profile" key.
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"_machinekey": "foo",
|
||||
"_profiles": "foo",
|
||||
"profile-baff": "foo",
|
||||
"authkey": "tskey-key",
|
||||
"_machinekey": "foo",
|
||||
"_profiles": "foo",
|
||||
"profile-baff": "foo",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
WantLog: "Waiting for tailscaled to finish writing state to Secret \"tailscale\"",
|
||||
},
|
||||
@@ -1029,11 +1040,12 @@ func TestContainerBoot(t *testing.T) {
|
||||
"_current-profile": "foo",
|
||||
},
|
||||
WantKubeSecret: map[string]string{
|
||||
"authkey": "tskey-key",
|
||||
"_machinekey": "foo",
|
||||
"_profiles": "foo",
|
||||
"profile-baff": "foo",
|
||||
"_current-profile": "foo",
|
||||
"authkey": "tskey-key",
|
||||
"_machinekey": "foo",
|
||||
"_profiles": "foo",
|
||||
"profile-baff": "foo",
|
||||
"_current-profile": "foo",
|
||||
kubetypes.KeyCapVer: capver,
|
||||
},
|
||||
WantLog: "HTTP server at [::]:9002 closed",
|
||||
WantExitCode: ptr.To(0),
|
||||
@@ -1061,7 +1073,7 @@ func TestContainerBoot(t *testing.T) {
|
||||
fmt.Sprintf("TS_TEST_SOCKET=%s", env.lapi.Path),
|
||||
fmt.Sprintf("TS_SOCKET=%s", env.runningSockPath),
|
||||
fmt.Sprintf("TS_TEST_ONLY_ROOT=%s", env.d),
|
||||
fmt.Sprint("TS_TEST_FAKE_NETFILTER=true"),
|
||||
"TS_TEST_FAKE_NETFILTER=true",
|
||||
}
|
||||
for k, v := range tc.Env {
|
||||
cmd.Env = append(cmd.Env, fmt.Sprintf("%s=%s", k, v))
|
||||
@@ -1489,10 +1501,7 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
switch r.Header.Get("Content-Type") {
|
||||
case "application/json-patch+json":
|
||||
req := []struct {
|
||||
Op string `json:"op"`
|
||||
Path string `json:"path"`
|
||||
}{}
|
||||
req := []kubeclient.JSONPatch{}
|
||||
if err := json.Unmarshal(bs, &req); err != nil {
|
||||
panic(fmt.Sprintf("json decode failed: %v. Body:\n\n%s", err, string(bs)))
|
||||
}
|
||||
@@ -1503,23 +1512,20 @@ func (k *kubeServer) serveSecret(w http.ResponseWriter, r *http.Request) {
|
||||
panic(fmt.Sprintf("unsupported json-patch path %q", op.Path))
|
||||
}
|
||||
delete(k.secret, strings.TrimPrefix(op.Path, "/data/"))
|
||||
case "replace":
|
||||
case "add", "replace":
|
||||
path, ok := strings.CutPrefix(op.Path, "/data/")
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("unsupported json-patch path %q", op.Path))
|
||||
}
|
||||
req := make([]kubeclient.JSONPatch, 0)
|
||||
if err := json.Unmarshal(bs, &req); err != nil {
|
||||
panic(fmt.Sprintf("json decode failed: %v. Body:\n\n%s", err, string(bs)))
|
||||
val, ok := op.Value.(string)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("unsupported json patch value %v: cannot be converted to string", op.Value))
|
||||
}
|
||||
|
||||
for _, patch := range req {
|
||||
val, ok := patch.Value.(string)
|
||||
if !ok {
|
||||
panic(fmt.Sprintf("unsupported json patch value %v: cannot be converted to string", patch.Value))
|
||||
}
|
||||
k.secret[path] = val
|
||||
v, err := base64.StdEncoding.DecodeString(val)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("json patch value %q is not base64 encoded: %v", val, err))
|
||||
}
|
||||
k.secret[path] = string(v)
|
||||
default:
|
||||
panic(fmt.Sprintf("unsupported json-patch op %q", op.Op))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user