Files
tailscale/ipn/store/kubestore/store_kube.go
T
Will Norris 3ec5be3f51 all: remove AUTHORS file and references to it
This file was never truly necessary and has never actually been used in
the history of Tailscale's open source releases.

A Brief History of AUTHORS files
---

The AUTHORS file was a pattern developed at Google, originally for
Chromium, then adopted by Go and a bunch of other projects. The problem
was that Chromium originally had a copyright line only recognizing
Google as the copyright holder. Because Google (and most open source
projects) do not require copyright assignemnt for contributions, each
contributor maintains their copyright. Some large corporate contributors
then tried to add their own name to the copyright line in the LICENSE
file or in file headers. This quickly becomes unwieldy, and puts a
tremendous burden on anyone building on top of Chromium, since the
license requires that they keep all copyright lines intact.

The compromise was to create an AUTHORS file that would list all of the
copyright holders. The LICENSE file and source file headers would then
include that list by reference, listing the copyright holder as "The
Chromium Authors".

This also become cumbersome to simply keep the file up to date with a
high rate of new contributors. Plus it's not always obvious who the
copyright holder is. Sometimes it is the individual making the
contribution, but many times it may be their employer. There is no way
for the proejct maintainer to know.

Eventually, Google changed their policy to no longer recommend trying to
keep the AUTHORS file up to date proactively, and instead to only add to
it when requested: https://opensource.google/docs/releasing/authors.
They are also clear that:

> Adding contributors to the AUTHORS file is entirely within the
> project's discretion and has no implications for copyright ownership.

It was primarily added to appease a small number of large contributors
that insisted that they be recognized as copyright holders (which was
entirely their right to do). But it's not truly necessary, and not even
the most accurate way of identifying contributors and/or copyright
holders.

In practice, we've never added anyone to our AUTHORS file. It only lists
Tailscale, so it's not really serving any purpose. It also causes
confusion because Tailscalars put the "Tailscale Inc & AUTHORS" header
in other open source repos which don't actually have an AUTHORS file, so
it's ambiguous what that means.

Instead, we just acknowledge that the contributors to Tailscale (whoever
they are) are copyright holders for their individual contributions. We
also have the benefit of using the DCO (developercertificate.org) which
provides some additional certification of their right to make the
contribution.

The source file changes were purely mechanical with:

    git ls-files | xargs sed -i -e 's/\(Tailscale Inc &\) AUTHORS/\1 contributors/g'

Updates #cleanup

Change-Id: Ia101a4a3005adb9118051b3416f5a64a4a45987d
Signed-off-by: Will Norris <will@tailscale.com>
2026-01-23 15:49:45 -08:00

519 lines
18 KiB
Go

// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
// Package kubestore contains an ipn.StateStore implementation using Kubernetes Secrets.
package kubestore
import (
"context"
"encoding/json"
"fmt"
"net"
"net/http"
"os"
"strings"
"time"
"tailscale.com/envknob"
"tailscale.com/ipn"
"tailscale.com/ipn/store"
"tailscale.com/ipn/store/mem"
"tailscale.com/kube/kubeapi"
"tailscale.com/kube/kubeclient"
"tailscale.com/kube/kubetypes"
"tailscale.com/types/logger"
"tailscale.com/util/dnsname"
"tailscale.com/util/mak"
)
func init() {
store.Register("kube:", func(logf logger.Logf, path string) (ipn.StateStore, error) {
secretName := strings.TrimPrefix(path, "kube:")
return New(logf, secretName)
})
}
const (
// timeout is the timeout for a single state update that includes calls to the API server to write or read a
// state Secret and emit an Event.
timeout = 30 * time.Second
reasonTailscaleStateUpdated = "TailscaledStateUpdated"
reasonTailscaleStateLoaded = "TailscaleStateLoaded"
reasonTailscaleStateUpdateFailed = "TailscaleStateUpdateFailed"
reasonTailscaleStateLoadFailed = "TailscaleStateLoadFailed"
eventTypeWarning = "Warning"
eventTypeNormal = "Normal"
keyTLSCert = "tls.crt"
keyTLSKey = "tls.key"
)
// Store is an ipn.StateStore that uses a Kubernetes Secret for persistence.
type Store struct {
client kubeclient.Client
canPatch bool
secretName string // state Secret
certShareMode string // 'ro', 'rw', or empty
podName string
logf logger.Logf
// memory holds the latest tailscale state. Writes write state to a kube
// Secret and memory, Reads read from memory.
memory mem.Store
}
// New returns a new Store that persists state to Kubernets Secret(s).
// Tailscale state is stored in a Secret named by the secretName parameter.
// TLS certs are stored and retrieved from state Secret or separate Secrets
// named after TLS endpoints if running in cert share mode.
func New(logf logger.Logf, secretName string) (*Store, error) {
c, err := newClient()
if err != nil {
return nil, err
}
return newWithClient(logf, c, secretName)
}
func newClient() (kubeclient.Client, error) {
c, err := kubeclient.New("tailscale-state-store")
if err != nil {
return nil, err
}
if os.Getenv("TS_KUBERNETES_READ_API_SERVER_ADDRESS_FROM_ENV") == "true" {
// Derive the API server address from the environment variables
c.SetURL(fmt.Sprintf("https://%s:%s", os.Getenv("KUBERNETES_SERVICE_HOST"), os.Getenv("KUBERNETES_SERVICE_PORT_HTTPS")))
}
return c, nil
}
func newWithClient(logf logger.Logf, c kubeclient.Client, secretName string) (*Store, error) {
canPatch, _, err := c.CheckSecretPermissions(context.Background(), secretName)
if err != nil {
return nil, err
}
s := &Store{
client: c,
canPatch: canPatch,
secretName: secretName,
podName: os.Getenv("POD_NAME"),
logf: logf,
}
if envknob.IsCertShareReadWriteMode() {
s.certShareMode = "rw"
} else if envknob.IsCertShareReadOnlyMode() {
s.certShareMode = "ro"
}
// Load latest state from kube Secret if it already exists.
if err := s.loadState(); err != nil && err != ipn.ErrStateNotExist {
return nil, fmt.Errorf("error loading state from kube Secret: %w", err)
}
// If we are in read-only cert share mode, pre-load existing shared certs.
// Write replicas never load certs in-memory to avoid a situation where,
// after Ingress recreation (and the associated cert Secret recreation), new
// TLS certs don't get issued because the write replica still has certs
// in-memory. Instead, write replicas fetch certs from Secret on each request.
if s.certShareMode == "ro" {
sel := s.certSecretSelector()
if err := s.loadCerts(context.Background(), sel); err != nil {
// We will attempt to again retrieve the certs from Secrets when a request for an HTTPS endpoint
// is received.
s.logf("[unexpected] error loading TLS certs: %v", err)
}
}
if s.certShareMode == "ro" {
go s.runCertReload(context.Background())
}
return s, nil
}
func (s *Store) SetDialer(d func(ctx context.Context, network, address string) (net.Conn, error)) {
s.client.SetDialer(d)
}
func (s *Store) String() string { return "kube.Store" }
// ReadState implements the StateStore interface.
func (s *Store) ReadState(id ipn.StateKey) ([]byte, error) {
return s.memory.ReadState(ipn.StateKey(sanitizeKey(id)))
}
// WriteState implements the StateStore interface.
func (s *Store) WriteState(id ipn.StateKey, bs []byte) (err error) {
defer func() {
if err == nil {
s.memory.WriteState(ipn.StateKey(sanitizeKey(id)), bs)
}
}()
return s.updateSecret(map[string][]byte{string(id): bs}, s.secretName)
}
// WriteTLSCertAndKey writes a TLS cert and key to domain.crt, domain.key fields
// of a Tailscale Kubernetes node's state Secret.
func (s *Store) WriteTLSCertAndKey(domain string, cert, key []byte) (err error) {
if s.certShareMode == "ro" {
s.logf("[unexpected] TLS cert and key write in read-only mode")
}
if err := dnsname.ValidHostname(domain); err != nil {
return fmt.Errorf("invalid domain name %q: %w", domain, err)
}
secretName := s.secretName
data := map[string][]byte{
domain + ".crt": cert,
domain + ".key": key,
}
// If we run in cert share mode, cert and key for a DNS name are written
// to a separate Secret.
if s.certShareMode == "rw" {
secretName = domain
data = map[string][]byte{
keyTLSCert: cert,
keyTLSKey: key,
}
}
if err := s.updateSecret(data, secretName); err != nil {
return fmt.Errorf("error writing TLS cert and key to Secret: %w", err)
}
// TODO(irbekrm): certs for write replicas are currently not
// written to memory to avoid out of sync memory state after
// Ingress resources have been recreated. This means that TLS
// certs for write replicas are retrieved from the Secret on
// each HTTPS request. This is a temporary solution till we
// implement a Secret watch.
if s.certShareMode != "rw" {
s.memory.WriteState(ipn.StateKey(domain+".crt"), cert)
s.memory.WriteState(ipn.StateKey(domain+".key"), key)
}
return nil
}
// ReadTLSCertAndKey reads a TLS cert and key from memory or from a
// domain-specific Secret. It first checks the in-memory store, if not found in
// memory and running cert store in read-only mode, looks up a Secret.
// Note that write replicas of HA Ingress always retrieve TLS certs from Secrets.
func (s *Store) ReadTLSCertAndKey(domain string) (cert, key []byte, err error) {
if err := dnsname.ValidHostname(domain); err != nil {
return nil, nil, fmt.Errorf("invalid domain name %q: %w", domain, err)
}
certKey := domain + ".crt"
keyKey := domain + ".key"
cert, err = s.memory.ReadState(ipn.StateKey(certKey))
if err == nil {
key, err = s.memory.ReadState(ipn.StateKey(keyKey))
if err == nil {
return cert, key, nil
}
}
if s.certShareMode == "" {
return nil, nil, ipn.ErrStateNotExist
}
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
secret, err := s.client.GetSecret(ctx, domain)
if err != nil {
if kubeclient.IsNotFoundErr(err) {
// TODO(irbekrm): we should return a more specific error
// that wraps ipn.ErrStateNotExist here.
return nil, nil, ipn.ErrStateNotExist
}
st, ok := err.(*kubeapi.Status)
if ok && st.Code == http.StatusForbidden && (s.certShareMode == "ro" || s.certShareMode == "rw") {
// In cert share mode, we read from a dedicated Secret per domain.
// To get here, we already had a cache miss from our in-memory
// store. For write replicas, that means it wasn't available on
// start and it wasn't written since. For read replicas, that means
// it wasn't available on start and it hasn't been reloaded in the
// background. So getting a "forbidden" error is an expected
// "not found" case where we've been asked for a cert we don't
// expect to issue, and so the forbidden error reflects that the
// operator didn't assign permission for a Secret for that domain.
//
// This code path gets triggered by the admin UI's machine page,
// which queries for the node's own TLS cert existing via the
// "tls-cert-status" c2n API.
return nil, nil, ipn.ErrStateNotExist
}
return nil, nil, fmt.Errorf("getting TLS Secret %q: %w", domain, err)
}
cert = secret.Data[keyTLSCert]
key = secret.Data[keyTLSKey]
if len(cert) == 0 || len(key) == 0 {
return nil, nil, ipn.ErrStateNotExist
}
// TODO(irbekrm): a read between these two separate writes would
// get a mismatched cert and key. Allow writing both cert and
// key to the memory store in a single, lock-protected operation.
//
// TODO(irbekrm): currently certs for write replicas of HA Ingress get
// retrieved from the cluster Secret on each HTTPS request to avoid a
// situation when after Ingress recreation stale certs are read from
// memory.
// Fix this by watching Secrets to ensure that memory store gets updated
// when Secrets are deleted.
if s.certShareMode == "ro" {
s.memory.WriteState(ipn.StateKey(certKey), cert)
s.memory.WriteState(ipn.StateKey(keyKey), key)
}
return cert, key, nil
}
func (s *Store) updateSecret(data map[string][]byte, secretName string) (err error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer func() {
if err != nil {
if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateUpdateFailed, err.Error()); err != nil {
s.logf("kubestore: error creating tailscaled state update Event: %v", err)
}
} else {
if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateUpdated, "Successfully updated tailscaled state Secret"); err != nil {
s.logf("kubestore: error creating tailscaled state Event: %v", err)
}
}
cancel()
}()
secret, err := s.client.GetSecret(ctx, secretName)
if err != nil {
// If the Secret does not exist, create it with the required data.
if kubeclient.IsNotFoundErr(err) && s.canCreateSecret(secretName) {
return s.client.CreateSecret(ctx, &kubeapi.Secret{
TypeMeta: kubeapi.TypeMeta{
APIVersion: "v1",
Kind: "Secret",
},
ObjectMeta: kubeapi.ObjectMeta{
Name: secretName,
},
Data: func(m map[string][]byte) map[string][]byte {
d := make(map[string][]byte, len(m))
for key, val := range m {
d[sanitizeKey(key)] = val
}
return d
}(data),
})
}
return fmt.Errorf("error getting Secret %s: %w", secretName, err)
}
if s.canPatchSecret(secretName) {
var m []kubeclient.JSONPatch
// If the user has pre-created a Secret with no data, we need to ensure the top level /data field.
if len(secret.Data) == 0 {
m = []kubeclient.JSONPatch{
{
Op: "add",
Path: "/data",
Value: func(m map[string][]byte) map[string][]byte {
d := make(map[string][]byte, len(m))
for key, val := range m {
d[sanitizeKey(key)] = val
}
return d
}(data),
},
}
// If the Secret has data, patch it with the new data.
} else {
for key, val := range data {
m = append(m, kubeclient.JSONPatch{
Op: "add",
Path: "/data/" + sanitizeKey(key),
Value: val,
})
}
}
if err := s.client.JSONPatchResource(ctx, secretName, kubeclient.TypeSecrets, m); err != nil {
return fmt.Errorf("error patching Secret %s: %w", secretName, err)
}
return nil
}
// No patch permissions, use UPDATE instead.
for key, val := range data {
mak.Set(&secret.Data, sanitizeKey(key), val)
}
if err := s.client.UpdateSecret(ctx, secret); err != nil {
return fmt.Errorf("error updating Secret %s: %w", s.secretName, err)
}
return nil
}
func (s *Store) loadState() (err error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
secret, err := s.client.GetSecret(ctx, s.secretName)
if err != nil {
if st, ok := err.(*kubeapi.Status); ok && st.Code == 404 {
return ipn.ErrStateNotExist
}
if err := s.client.Event(ctx, eventTypeWarning, reasonTailscaleStateLoadFailed, err.Error()); err != nil {
s.logf("kubestore: error creating Event: %v", err)
}
return err
}
if err := s.client.Event(ctx, eventTypeNormal, reasonTailscaleStateLoaded, "Successfully loaded tailscaled state from Secret"); err != nil {
s.logf("kubestore: error creating Event: %v", err)
}
data, err := s.maybeStripAttestationKeyFromProfile(secret.Data)
if err != nil {
return fmt.Errorf("error attempting to strip attestation data from state Secret: %w", err)
}
s.memory.LoadFromMap(data)
return nil
}
// maybeStripAttestationKeyFromProfile removes the hardware attestation key
// field from serialized Tailscale profile. This is done to recover from a bug
// introduced in 1.92, where node-bound hardware attestation keys were added to
// Tailscale states stored in Kubernetes Secrets.
// See https://github.com/tailscale/tailscale/issues/18302
// TODO(irbekrm): it would be good if we could somehow determine when we no
// longer need to run this check.
func (s *Store) maybeStripAttestationKeyFromProfile(data map[string][]byte) (map[string][]byte, error) {
prefsKey := extractPrefsKey(data)
prefsBytes, ok := data[prefsKey]
if !ok {
return data, nil
}
var prefs map[string]any
if err := json.Unmarshal(prefsBytes, &prefs); err != nil {
s.logf("[unexpected]: kube store: failed to unmarshal prefs data")
// don't error as in most cases the state won't have the attestation key
return data, nil
}
config, ok := prefs["Config"].(map[string]any)
if !ok {
return data, nil
}
if _, hasKey := config["AttestationKey"]; !hasKey {
return data, nil
}
s.logf("kube store: found redundant attestation key, deleting")
delete(config, "AttestationKey")
prefsBytes, err := json.Marshal(prefs)
if err != nil {
return nil, fmt.Errorf("[unexpected] kube store: failed to marshal profile after removing attestation key: %v", err)
}
data[prefsKey] = prefsBytes
if err := s.updateSecret(map[string][]byte{prefsKey: prefsBytes}, s.secretName); err != nil {
// don't error out - this might have been a temporary kube API server
// connection issue. The key will be removed from the in-memory cache
// and we'll retry updating the Secret on the next restart.
s.logf("kube store: error updating Secret after stripping AttestationKey: %v", err)
}
return data, nil
}
const currentProfileKey = "_current-profile"
// extractPrefs returns the key at which Tailscale prefs are stored in the
// provided Secret data.
func extractPrefsKey(data map[string][]byte) string {
return string(data[currentProfileKey])
}
// runCertReload relists and reloads all TLS certs for endpoints shared by this
// node from Secrets other than the state Secret to ensure that renewed certs get eventually loaded.
// It is not critical to reload a cert immediately after
// renewal, so a daily check is acceptable.
// Currently (3/2025) this is only used for the shared HA Ingress certs on 'read' replicas.
// Note that if shared certs are not found in memory on an HTTPS request, we
// do a Secret lookup, so this mechanism does not need to ensure that newly
// added Ingresses' certs get loaded.
func (s *Store) runCertReload(ctx context.Context) {
ticker := time.NewTicker(time.Hour * 24)
defer ticker.Stop()
for {
select {
case <-ctx.Done():
return
case <-ticker.C:
sel := s.certSecretSelector()
if err := s.loadCerts(ctx, sel); err != nil {
s.logf("[unexpected] error reloading TLS certs: %v", err)
}
}
}
}
// loadCerts lists all Secrets matching the provided selector and loads TLS
// certs and keys from those.
func (s *Store) loadCerts(ctx context.Context, sel map[string]string) error {
ss, err := s.client.ListSecrets(ctx, sel)
if err != nil {
return fmt.Errorf("error listing TLS Secrets: %w", err)
}
for _, secret := range ss.Items {
if !hasTLSData(&secret) {
continue
}
// Only load secrets that have valid domain names (ending in .ts.net)
if !strings.HasSuffix(secret.Name, ".ts.net") {
continue
}
s.memory.WriteState(ipn.StateKey(secret.Name)+".crt", secret.Data[keyTLSCert])
s.memory.WriteState(ipn.StateKey(secret.Name)+".key", secret.Data[keyTLSKey])
}
return nil
}
// canCreateSecret returns true if this node should be allowed to create the given
// Secret in its namespace.
func (s *Store) canCreateSecret(secret string) bool {
// Only allow creating the state Secret (and not TLS Secrets).
return secret == s.secretName
}
// canPatchSecret returns true if this node should be allowed to patch the given
// Secret.
func (s *Store) canPatchSecret(secret string) bool {
// For backwards compatibility reasons, setups where the proxies are not
// given PATCH permissions for state Secrets are allowed. For TLS
// Secrets, we should always have PATCH permissions.
if secret == s.secretName {
return s.canPatch
}
return true
}
// certSecretSelector returns a label selector that can be used to list all
// Secrets that aren't Tailscale state Secrets and contain TLS certificates for
// HTTPS endpoints that this node serves.
// Currently (7/2025) this only applies to the Kubernetes Operator's ProxyGroup
// when spec.Type is "ingress" or "kube-apiserver".
func (s *Store) certSecretSelector() map[string]string {
if s.podName == "" {
return map[string]string{}
}
p := strings.LastIndex(s.podName, "-")
if p == -1 {
return map[string]string{}
}
pgName := s.podName[:p]
return map[string]string{
kubetypes.LabelSecretType: kubetypes.LabelSecretTypeCerts,
kubetypes.LabelManaged: "true",
"tailscale.com/proxy-group": pgName,
}
}
// hasTLSData returns true if the provided Secret contains non-empty TLS cert and key.
func hasTLSData(s *kubeapi.Secret) bool {
return len(s.Data[keyTLSCert]) != 0 && len(s.Data[keyTLSKey]) != 0
}
// sanitizeKey converts any value that can be converted to a string into a valid Kubernetes Secret key.
// Valid characters are alphanumeric, -, _, and .
// https://kubernetes.io/docs/concepts/configuration/secret/#restriction-names-data.
func sanitizeKey[T ~string](k T) string {
return strings.Map(func(r rune) rune {
if r >= 'a' && r <= 'z' || r >= 'A' && r <= 'Z' || r >= '0' && r <= '9' || r == '-' || r == '_' || r == '.' {
return r
}
return '_'
}, string(k))
}