From 1403920367135fc2559ee97123a64bae2405365d Mon Sep 17 00:00:00 2001 From: Mike O'Driscoll Date: Tue, 24 Mar 2026 10:52:20 -0400 Subject: [PATCH] derp,types,util: use bufio Peek+Discard for allocation-free fast reads (#19067) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace byte-at-a-time ReadByte loops with Peek+Discard in the DERP read path. Peek returns a slice into bufio's internal buffer without allocating, and Discard advances the read pointer without copying. Introduce util/bufiox with a BufferedReader interface and ReadFull helper that uses Peek+copy+Discard as an allocation-free alternative to io.ReadFull. - derp.ReadFrameHeader: replace 5× ReadByte with Peek(5)+Discard(5), reading the frame type and length directly from the peeked slice. Remove now-unused readUint32 helper. name old ns/op new ns/op speedup ReadFrameHeader-8 24.2 12.4 ~2x (0 allocs/op in both) - key.NodePublic.ReadRawWithoutAllocating: replace 32× ReadByte with bufiox.ReadFull. Addresses the "Dear future" comment about switching away from byte-at-a-time reads once a non-escaping alternative exists. name old ns/op new ns/op speedup NodeReadRawWithoutAllocating-8 140 43.6 ~3.2x (0 allocs/op in both) - derpserver.handleFramePing: replace io.ReadFull with bufiox.ReadFull. Updates tailscale/corp#38509 Signed-off-by: Mike O'Driscoll --- cmd/derper/depaware.txt | 1 + cmd/k8s-operator/depaware.txt | 1 + cmd/stund/depaware.txt | 1 + cmd/tailscale/depaware.txt | 1 + cmd/tailscaled/depaware-min.txt | 1 + cmd/tailscaled/depaware-minbox.txt | 1 + cmd/tailscaled/depaware.txt | 1 + cmd/tsidp/depaware.txt | 1 + derp/client_test.go | 7 +-- derp/derp.go | 29 ++------- derp/derp_test.go | 59 ++++++++++++++++++ derp/derpserver/derpserver.go | 5 +- tsnet/depaware.txt | 1 + types/key/node.go | 21 ++----- types/key/node_test.go | 19 ++++++ util/bufiox/bufiox.go | 31 ++++++++++ util/bufiox/bufiox_test.go | 98 ++++++++++++++++++++++++++++++ 17 files changed, 231 insertions(+), 47 deletions(-) create mode 100644 util/bufiox/bufiox.go create mode 100644 util/bufiox/bufiox_test.go diff --git a/cmd/derper/depaware.txt b/cmd/derper/depaware.txt index 30d19b781..ec59c7264 100644 --- a/cmd/derper/depaware.txt +++ b/cmd/derper/depaware.txt @@ -142,6 +142,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa tailscale.com/types/structs from tailscale.com/ipn+ tailscale.com/types/tkatype from tailscale.com/client/local+ tailscale.com/types/views from tailscale.com/ipn+ + tailscale.com/util/bufiox from tailscale.com/derp/derpserver+ tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/net/netmon+ tailscale.com/util/cloudenv from tailscale.com/hostinfo+ diff --git a/cmd/k8s-operator/depaware.txt b/cmd/k8s-operator/depaware.txt index 3c8e35542..a3340d03b 100644 --- a/cmd/k8s-operator/depaware.txt +++ b/cmd/k8s-operator/depaware.txt @@ -933,6 +933,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/ tailscale.com/types/tkatype from tailscale.com/client/local+ tailscale.com/types/views from tailscale.com/appc+ tailscale.com/util/backoff from tailscale.com/cmd/k8s-operator+ + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+ tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/cmd/k8s-operator+ diff --git a/cmd/stund/depaware.txt b/cmd/stund/depaware.txt index 0b42072d9..7804915dc 100644 --- a/cmd/stund/depaware.txt +++ b/cmd/stund/depaware.txt @@ -75,6 +75,7 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar tailscale.com/types/structs from tailscale.com/tailcfg+ tailscale.com/types/tkatype from tailscale.com/tailcfg+ tailscale.com/types/views from tailscale.com/net/tsaddr+ + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/ctxkey from tailscale.com/tsweb+ L 💣 tailscale.com/util/dirwalk from tailscale.com/metrics tailscale.com/util/dnsname from tailscale.com/tailcfg diff --git a/cmd/tailscale/depaware.txt b/cmd/tailscale/depaware.txt index 4d6a1efb6..01d3f418f 100644 --- a/cmd/tailscale/depaware.txt +++ b/cmd/tailscale/depaware.txt @@ -258,6 +258,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep tailscale.com/types/tkatype from tailscale.com/types/key+ tailscale.com/types/views from tailscale.com/tailcfg+ tailscale.com/util/backoff from tailscale.com/cmd/tailscale/cli + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/net/netcheck+ tailscale.com/util/cloudenv from tailscale.com/net/dnscache+ diff --git a/cmd/tailscaled/depaware-min.txt b/cmd/tailscaled/depaware-min.txt index 43b61f7f3..94ebf144b 100644 --- a/cmd/tailscaled/depaware-min.txt +++ b/cmd/tailscaled/depaware-min.txt @@ -151,6 +151,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/types/tkatype from tailscale.com/control/controlclient+ tailscale.com/types/views from tailscale.com/appc+ tailscale.com/util/backoff from tailscale.com/control/controlclient+ + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+ tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/appc+ diff --git a/cmd/tailscaled/depaware-minbox.txt b/cmd/tailscaled/depaware-minbox.txt index 0da47e03a..e518613f8 100644 --- a/cmd/tailscaled/depaware-minbox.txt +++ b/cmd/tailscaled/depaware-minbox.txt @@ -170,6 +170,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/types/tkatype from tailscale.com/control/controlclient+ tailscale.com/types/views from tailscale.com/appc+ tailscale.com/util/backoff from tailscale.com/control/controlclient+ + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+ tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/appc+ diff --git a/cmd/tailscaled/depaware.txt b/cmd/tailscaled/depaware.txt index 8af768895..507162809 100644 --- a/cmd/tailscaled/depaware.txt +++ b/cmd/tailscaled/depaware.txt @@ -427,6 +427,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de tailscale.com/types/tkatype from tailscale.com/tka+ tailscale.com/types/views from tailscale.com/ipn/ipnlocal+ tailscale.com/util/backoff from tailscale.com/cmd/tailscaled+ + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+ tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/control/controlclient+ diff --git a/cmd/tsidp/depaware.txt b/cmd/tsidp/depaware.txt index 123df14ce..360437860 100644 --- a/cmd/tsidp/depaware.txt +++ b/cmd/tsidp/depaware.txt @@ -334,6 +334,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar tailscale.com/types/tkatype from tailscale.com/client/local+ tailscale.com/types/views from tailscale.com/appc+ tailscale.com/util/backoff from tailscale.com/control/controlclient+ + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+ tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/appc+ diff --git a/derp/client_test.go b/derp/client_test.go index e1bcaba8b..b3bacdb85 100644 --- a/derp/client_test.go +++ b/derp/client_test.go @@ -141,15 +141,12 @@ func (r nopRead) Read(p []byte) (int, error) { return len(p), nil } -var sinkU32 uint32 - -func BenchmarkReadUint32(b *testing.B) { +func BenchmarkReadFrameHeader(b *testing.B) { r := bufio.NewReader(nopRead{}) - var err error b.ReportAllocs() b.ResetTimer() for range b.N { - sinkU32, err = readUint32(r) + _, _, err := ReadFrameHeader(r) if err != nil { b.Fatal(err) } diff --git a/derp/derp.go b/derp/derp.go index a7d0ea801..d4a09415a 100644 --- a/derp/derp.go +++ b/derp/derp.go @@ -183,21 +183,6 @@ func writeUint32(bw *bufio.Writer, v uint32) error { return nil } -func readUint32(br *bufio.Reader) (uint32, error) { - var b [4]byte - // Reading a byte at a time is a bit silly, - // but it causes b not to escape, - // which more than pays for the silliness. - for i := range &b { - c, err := br.ReadByte() - if err != nil { - return 0, err - } - b[i] = c - } - return bin.Uint32(b[:]), nil -} - // ReadFrameTypeHeader reads a frame header from br and // verifies that the frame type matches wantType. // @@ -213,18 +198,16 @@ func ReadFrameTypeHeader(br *bufio.Reader, wantType FrameType) (frameLen uint32, return frameLen, err } -// ReadFrameHeader reads the header of a DERP frame, -// reading 5 bytes from br. +// ReadFrameHeader reads a DERP frame header ([FrameHeaderLen] bytes) from br. +// It uses Peek+Discard to read directly from bufio's internal buffer +// without copying or allocating. func ReadFrameHeader(br *bufio.Reader) (t FrameType, frameLen uint32, err error) { - tb, err := br.ReadByte() - if err != nil { - return 0, 0, err - } - frameLen, err = readUint32(br) + hdr, err := br.Peek(FrameHeaderLen) if err != nil { return 0, 0, err } - return FrameType(tb), frameLen, nil + defer br.Discard(FrameHeaderLen) + return FrameType(hdr[0]), bin.Uint32(hdr[1:FrameHeaderLen]), nil } // readFrame reads a frame header and then reads its payload into diff --git a/derp/derp_test.go b/derp/derp_test.go index f2ccefc9f..0df49969e 100644 --- a/derp/derp_test.go +++ b/derp/derp_test.go @@ -34,6 +34,65 @@ type ( Client = derp.Client ) +func TestReadFrameHeader(t *testing.T) { + tests := []struct { + name string + input [5]byte + wantType derp.FrameType + wantLen uint32 + }{ + { + name: "SendPacket", + input: [5]byte{byte(derp.FrameSendPacket), 0x00, 0x00, 0x04, 0x00}, + wantType: derp.FrameSendPacket, + wantLen: 1024, + }, + { + name: "KeepAlive", + input: [5]byte{byte(derp.FrameKeepAlive), 0x00, 0x00, 0x00, 0x00}, + wantType: derp.FrameKeepAlive, + wantLen: 0, + }, + { + name: "MaxLen", + input: [5]byte{byte(derp.FrameRecvPacket), 0xff, 0xff, 0xff, 0xff}, + wantType: derp.FrameRecvPacket, + wantLen: 0xffffffff, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + br := bufio.NewReader(bytes.NewReader(tt.input[:])) + gotType, gotLen, err := derp.ReadFrameHeader(br) + if err != nil { + t.Fatalf("ReadFrameHeader: %v", err) + } + if gotType != tt.wantType { + t.Errorf("type = %v, want %v", gotType, tt.wantType) + } + if gotLen != tt.wantLen { + t.Errorf("len = %v, want %v", gotLen, tt.wantLen) + } + }) + } + + // Verify zero allocations. + buf := make([]byte, 4096) + rd := bytes.NewReader(buf) + br := bufio.NewReader(rd) + got := testing.AllocsPerRun(1000, func() { + rd.Reset(buf) + br.Reset(rd) + _, _, err := derp.ReadFrameHeader(br) + if err != nil { + t.Fatalf("ReadFrameHeader: %v", err) + } + }) + if got != 0 { + t.Fatalf("ReadFrameHeader allocs = %f, want 0", got) + } +} + func TestClientInfoUnmarshal(t *testing.T) { for i, in := range map[string]struct { json string diff --git a/derp/derpserver/derpserver.go b/derp/derpserver/derpserver.go index 66385dd77..0959a4729 100644 --- a/derp/derpserver/derpserver.go +++ b/derp/derpserver/derpserver.go @@ -52,6 +52,7 @@ import ( "tailscale.com/tstime/rate" "tailscale.com/types/key" "tailscale.com/types/logger" + "tailscale.com/util/bufiox" "tailscale.com/util/ctxkey" "tailscale.com/util/mak" "tailscale.com/util/set" @@ -1088,10 +1089,10 @@ func (c *sclient) handleFramePing(ft derp.FrameType, fl uint32) error { // space for future extensibility, but not too much. return fmt.Errorf("ping body too large: %v", fl) } - _, err := io.ReadFull(c.br, m[:]) - if err != nil { + if _, err := bufiox.ReadFull(c.br, m[:]); err != nil { return err } + var err error if extra := int64(fl) - int64(len(m)); extra > 0 { _, err = io.CopyN(io.Discard, c.br, extra) } diff --git a/tsnet/depaware.txt b/tsnet/depaware.txt index 910314ef1..b8b6aec98 100644 --- a/tsnet/depaware.txt +++ b/tsnet/depaware.txt @@ -329,6 +329,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware) tailscale.com/types/tkatype from tailscale.com/client/local+ tailscale.com/types/views from tailscale.com/appc+ tailscale.com/util/backoff from tailscale.com/control/controlclient+ + tailscale.com/util/bufiox from tailscale.com/types/key tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+ tailscale.com/util/cibuild from tailscale.com/health+ tailscale.com/util/clientmetric from tailscale.com/appc+ diff --git a/types/key/node.go b/types/key/node.go index 83be593af..1b0232b11 100644 --- a/types/key/node.go +++ b/types/key/node.go @@ -15,6 +15,7 @@ import ( "golang.org/x/crypto/curve25519" "golang.org/x/crypto/nacl/box" "tailscale.com/types/structs" + "tailscale.com/util/bufiox" ) const ( @@ -242,28 +243,14 @@ func (k NodePublic) AppendTo(buf []byte) []byte { } // ReadRawWithoutAllocating initializes k with bytes read from br. -// The reading is done ~4x slower than io.ReadFull, but in exchange is -// allocation-free. +// It uses [bufiox.ReadFull] to read without heap allocations. func (k *NodePublic) ReadRawWithoutAllocating(br *bufio.Reader) error { var z NodePublic if *k != z { return errors.New("refusing to read into non-zero NodePublic") } - // This is ~4x slower than io.ReadFull, but using io.ReadFull - // causes one extra alloc, which is significant for the DERP - // server that consumes this method. So, process stuff slower but - // without allocation. - // - // Dear future: if io.ReadFull stops causing stuff to escape, you - // should switch back to that. - for i := range k.k { - b, err := br.ReadByte() - if err != nil { - return err - } - k.k[i] = b - } - return nil + _, err := bufiox.ReadFull(br, k.k[:]) + return err } // WriteRawWithoutAllocating writes out k as 32 bytes to bw. diff --git a/types/key/node_test.go b/types/key/node_test.go index 77eef2b28..0bcda0465 100644 --- a/types/key/node_test.go +++ b/types/key/node_test.go @@ -125,6 +125,25 @@ func TestNodeReadRawWithoutAllocating(t *testing.T) { } } +func BenchmarkNodeReadRawWithoutAllocating(b *testing.B) { + buf := make([]byte, 32) + for i := range buf { + buf[i] = 0x42 + } + r := bytes.NewReader(buf) + br := bufio.NewReader(r) + b.ReportAllocs() + b.ResetTimer() + for range b.N { + r.Reset(buf) + br.Reset(r) + var k NodePublic + if err := k.ReadRawWithoutAllocating(br); err != nil { + b.Fatal(err) + } + } +} + func TestNodeWriteRawWithoutAllocating(t *testing.T) { buf := make([]byte, 0, 32) w := bytes.NewBuffer(buf) diff --git a/util/bufiox/bufiox.go b/util/bufiox/bufiox.go new file mode 100644 index 000000000..ceea9aefe --- /dev/null +++ b/util/bufiox/bufiox.go @@ -0,0 +1,31 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +// Package bufiox provides extensions to the standard bufio package. +package bufiox + +import "io" + +// BufferedReader is an interface for readers that support peeking +// into an internal buffer, like [bufio.Reader]. +type BufferedReader interface { + Peek(n int) ([]byte, error) + Discard(n int) (discarded int, err error) +} + +// ReadFull reads exactly len(buf) bytes from r into buf, like +// [io.ReadFull], but without heap allocations. It uses Peek to +// access the buffered data directly, copies it into buf, then +// discards the consumed bytes. If an error occurs, +// discard is not called and the buffer is left unchanged. +func ReadFull(r BufferedReader, buf []byte) (int, error) { + b, err := r.Peek(len(buf)) + if err != nil { + if len(b) > 0 && err == io.EOF { + err = io.ErrUnexpectedEOF + } + return 0, err + } + defer r.Discard(len(buf)) + return copy(buf, b), nil +} diff --git a/util/bufiox/bufiox_test.go b/util/bufiox/bufiox_test.go new file mode 100644 index 000000000..727bb3699 --- /dev/null +++ b/util/bufiox/bufiox_test.go @@ -0,0 +1,98 @@ +// Copyright (c) Tailscale Inc & contributors +// SPDX-License-Identifier: BSD-3-Clause + +package bufiox + +import ( + "bufio" + "bytes" + "io" + "testing" +) + +func TestReadFull(t *testing.T) { + data := []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08} + br := bufio.NewReader(bytes.NewReader(data)) + + var buf [5]byte + n, err := ReadFull(br, buf[:]) + if err != nil { + t.Fatalf("ReadFull: %v", err) + } + if n != len(buf) { + t.Fatalf("n = %d, want %d", n, len(buf)) + } + if want := [5]byte{0x01, 0x02, 0x03, 0x04, 0x05}; buf != want { + t.Fatalf("buf = %v, want %v", buf, want) + } + + // Remaining bytes should still be readable. + var rest [3]byte + n, err = ReadFull(br, rest[:]) + if err != nil { + t.Fatalf("ReadFull rest: %v", err) + } + if n != len(rest) { + t.Fatalf("rest n = %d, want %d", n, len(rest)) + } + if want := [3]byte{0x06, 0x07, 0x08}; rest != want { + t.Fatalf("rest = %v, want %v", rest, want) + } +} + +func TestReadFullShort(t *testing.T) { + data := []byte{0x01, 0x02} + br := bufio.NewReader(bytes.NewReader(data)) + + var buf [5]byte + _, err := ReadFull(br, buf[:]) + if err != io.ErrUnexpectedEOF { + t.Fatalf("err = %v, want %v", err, io.ErrUnexpectedEOF) + } +} + +func TestReadFullEmpty(t *testing.T) { + br := bufio.NewReader(bytes.NewReader(nil)) + + var buf [1]byte + _, err := ReadFull(br, buf[:]) + if err != io.EOF { + t.Fatalf("err = %v, want %v", err, io.EOF) + } +} + +func TestReadFullZeroAllocs(t *testing.T) { + data := make([]byte, 64) + rd := bytes.NewReader(data) + br := bufio.NewReader(rd) + + var buf [32]byte + got := testing.AllocsPerRun(1000, func() { + rd.Reset(data) + br.Reset(rd) + _, err := ReadFull(br, buf[:]) + if err != nil { + t.Fatalf("ReadFull: %v", err) + } + }) + if got != 0 { + t.Fatalf("ReadFull allocs = %f, want 0", got) + } +} + +type nopReader struct{} + +func (nopReader) Read(p []byte) (int, error) { return len(p), nil } + +func BenchmarkReadFull(b *testing.B) { + br := bufio.NewReader(nopReader{}) + var buf [32]byte + b.ReportAllocs() + b.ResetTimer() + for range b.N { + _, err := ReadFull(br, buf[:]) + if err != nil { + b.Fatal(err) + } + } +}