derp/derpserver: use hashtriemap for peer lookup

Replace the process-global Server.mu lookup in the packet send hot path
with a global hashtriemap mirror of local clientSet entries. The
authoritative clients map remains guarded by Server.mu; clientsAtomic is
only a lock-free fast path for active local clients.

Misses, stale inactive client sets, duplicate accounting, and mesh
forwarding still fall back to lookupDestUncached. This avoids taking
Server.mu for the common local active-client send path, at the cost of
adding one global concurrent map that mirrors Server.clients for local
peers.

The benchmark uses four destination peers. The before run sets
TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE=true to force the old mutex lookup
path; the after run uses the hashtrie fast path.

    goos: linux
    goarch: amd64
    pkg: tailscale.com/derp/derpserver
    cpu: Intel(R) Xeon(R) 6975P-C
                          │    before     │                after                │
                          │    sec/op     │   sec/op     vs base                │
    LookupDestHashTrie-16   176.050n ± 1%   1.904n ± 6%  -98.92% (p=0.000 n=10)

                          │   before   │             after              │
                          │    B/op    │    B/op     vs base            │
    LookupDestHashTrie-16   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
    ¹ all samples are equal

                          │   before   │             after              │
                          │ allocs/op  │ allocs/op   vs base            │
    LookupDestHashTrie-16   0.000 ± 0%   0.000 ± 0%  ~ (p=1.000 n=10) ¹
    ¹ all samples are equal

Updates #3560 (very indirectly, historically)
Updates #19713 (as an alternative to that PR)

Change-Id: Ifb72e5c9854ad00e938cd24c6ab9c27312f297e8
Signed-off-by: Brad Fitzpatrick <bradfitz@tailscale.com>
This commit is contained in:
Brad Fitzpatrick
2026-05-12 20:39:20 +00:00
committed by Brad Fitzpatrick
parent 72578de033
commit 6b729795c3
8 changed files with 214 additions and 18 deletions
+155
View File
@@ -29,6 +29,8 @@ import (
"golang.org/x/time/rate"
"tailscale.com/derp"
"tailscale.com/derp/derpconst"
"tailscale.com/envknob"
"tailscale.com/tstime"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/set"
@@ -1445,6 +1447,159 @@ func TestLoadAndApplyRateConfig(t *testing.T) {
})
}
const peerHashTrieDisableEnv = "TS_DEBUG_DERP_DISABLE_PEER_HASHTRIE"
func setPeerHashTrieDisabled(tb testing.TB, disabled bool) {
tb.Helper()
envknob.Setenv(peerHashTrieDisableEnv, fmt.Sprint(disabled))
tb.Cleanup(func() { envknob.Setenv(peerHashTrieDisableEnv, "") })
}
func TestLookupDestHashTrieFastPath(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
src := pubAll(1)
dst := pubAll(2)
dstClient := &sclient{key: dst}
cs := &clientSet{}
cs.activeClient.Store(dstClient)
s.clients[dst] = cs
s.clientsAtomic.Store(dst, cs)
c := &sclient{s: s, key: src}
got, fwd, dstLen := c.lookupDest(dst)
if got != dstClient || fwd != nil || dstLen != 1 {
t.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, 1)", got, fwd, dstLen, dstClient)
}
// This must not deadlock while s.mu is held; the hashtrie fast path
// should not acquire Server.mu.
s.mu.Lock()
got, _, _ = c.lookupDest(dst)
s.mu.Unlock()
if got != dstClient {
t.Fatalf("lookupDest got %v, want %v", got, dstClient)
}
}
func TestLookupDestHashTrieFallsBackForForwarder(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
src := pubAll(1)
dst := pubAll(2)
c := &sclient{s: s, key: src}
s.clientsMesh[dst] = testFwd(1)
got, fwd, dstLen := c.lookupDest(dst)
if got != nil || fwd != testFwd(1) || dstLen != 0 {
t.Fatalf("lookupDest = (%v, %v, %d), want (nil, testFwd(1), 0)", got, fwd, dstLen)
}
}
func TestLookupDestHashTrieIgnoresInactiveStaleSet(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
src := pubAll(1)
dst := pubAll(2)
c := &sclient{s: s, key: src}
s.clientsAtomic.Store(dst, &clientSet{})
newClient := &sclient{key: dst}
newSet := &clientSet{}
newSet.activeClient.Store(newClient)
s.clients[dst] = newSet
got, fwd, dstLen := c.lookupDest(dst)
if got != newClient || fwd != nil || dstLen != 1 {
t.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, 1)", got, fwd, dstLen, newClient)
}
}
func TestLookupDestHashTrieNoAlloc(t *testing.T) {
setPeerHashTrieDisabled(t, false)
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
var dstKeys [4]key.NodePublic
var dstClients [4]*sclient
for i := range dstKeys {
dstKeys[i] = pubAll(byte(i + 2))
dstClients[i] = &sclient{key: dstKeys[i]}
cs := &clientSet{}
cs.activeClient.Store(dstClients[i])
s.clients[dstKeys[i]] = cs
s.clientsAtomic.Store(dstKeys[i], cs)
}
c := &sclient{s: s, key: pubAll(1)}
var i int
var got *sclient
allocs := testing.AllocsPerRun(1000, func() {
idx := i & (len(dstKeys) - 1)
got, _, _ = c.lookupDest(dstKeys[idx])
i++
})
if got == nil {
t.Fatal("lookupDest returned nil")
}
if allocs != 0 {
t.Fatalf("lookupDest allocated %v times per run, want 0", allocs)
}
}
func BenchmarkLookupDestHashTrie(b *testing.B) {
s := &Server{
clients: map[key.NodePublic]*clientSet{},
clientsMesh: map[key.NodePublic]PacketForwarder{},
clock: tstime.StdClock{},
}
var dstKeys [4]key.NodePublic
var dstClients [4]*sclient
for i := range dstKeys {
dstKeys[i] = pubAll(byte(i + 2))
dstClients[i] = &sclient{key: dstKeys[i]}
cs := &clientSet{}
cs.activeClient.Store(dstClients[i])
s.clients[dstKeys[i]] = cs
s.clientsAtomic.Store(dstKeys[i], cs)
}
b.ReportAllocs()
b.SetParallelism(32)
b.ResetTimer()
b.RunParallel(func(pb *testing.PB) {
c := &sclient{s: s, key: pubAll(1)}
var i int
for pb.Next() {
idx := i & (len(dstKeys) - 1)
got, fwd, dstLen := c.lookupDest(dstKeys[idx])
if got != dstClients[idx] || fwd != nil {
b.Fatalf("lookupDest = (%v, %v, %d), want (%v, nil, _)", got, fwd, dstLen, dstClients[idx])
}
i++
}
})
}
func BenchmarkSenderCardinalityOverhead(b *testing.B) {
hll := hyperloglog.New()
sender := key.NewNode().Public()