derp,types,util: use bufio Peek+Discard for allocation-free fast reads (#19067)

Replace byte-at-a-time ReadByte loops with Peek+Discard in the DERP
read path. Peek returns a slice into bufio's internal buffer without
allocating, and Discard advances the read pointer without copying.

Introduce util/bufiox with a BufferedReader interface and ReadFull
helper that uses Peek+copy+Discard as an allocation-free alternative
to io.ReadFull.

  - derp.ReadFrameHeader: replace 5× ReadByte with Peek(5)+Discard(5),
    reading the frame type and length directly from the peeked slice.
    Remove now-unused readUint32 helper.

    name                  old ns/op  new ns/op  speedup
    ReadFrameHeader-8     24.2       12.4       ~2x
    (0 allocs/op in both)

  - key.NodePublic.ReadRawWithoutAllocating: replace 32× ReadByte with
    bufiox.ReadFull. Addresses the "Dear future" comment about switching
    away from byte-at-a-time reads once a non-escaping alternative exists.

    name                              old ns/op  new ns/op  speedup
    NodeReadRawWithoutAllocating-8    140        43.6       ~3.2x
    (0 allocs/op in both)

  - derpserver.handleFramePing: replace io.ReadFull with bufiox.ReadFull.

Updates tailscale/corp#38509

Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
main
Mike O'Driscoll 3 weeks ago committed by GitHub
parent 1d0fde6fc2
commit 1403920367
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      cmd/derper/depaware.txt
  2. 1
      cmd/k8s-operator/depaware.txt
  3. 1
      cmd/stund/depaware.txt
  4. 1
      cmd/tailscale/depaware.txt
  5. 1
      cmd/tailscaled/depaware-min.txt
  6. 1
      cmd/tailscaled/depaware-minbox.txt
  7. 1
      cmd/tailscaled/depaware.txt
  8. 1
      cmd/tsidp/depaware.txt
  9. 7
      derp/client_test.go
  10. 29
      derp/derp.go
  11. 59
      derp/derp_test.go
  12. 5
      derp/derpserver/derpserver.go
  13. 1
      tsnet/depaware.txt
  14. 19
      types/key/node.go
  15. 19
      types/key/node_test.go
  16. 31
      util/bufiox/bufiox.go
  17. 98
      util/bufiox/bufiox_test.go

@ -142,6 +142,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
tailscale.com/types/structs from tailscale.com/ipn+
tailscale.com/types/tkatype from tailscale.com/client/local+
tailscale.com/types/views from tailscale.com/ipn+
tailscale.com/util/bufiox from tailscale.com/derp/derpserver+
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/net/netmon+
tailscale.com/util/cloudenv from tailscale.com/hostinfo+

@ -933,6 +933,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
tailscale.com/types/tkatype from tailscale.com/client/local+
tailscale.com/types/views from tailscale.com/appc+
tailscale.com/util/backoff from tailscale.com/cmd/k8s-operator+
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/cmd/k8s-operator+

@ -75,6 +75,7 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar
tailscale.com/types/structs from tailscale.com/tailcfg+
tailscale.com/types/tkatype from tailscale.com/tailcfg+
tailscale.com/types/views from tailscale.com/net/tsaddr+
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/ctxkey from tailscale.com/tsweb+
L 💣 tailscale.com/util/dirwalk from tailscale.com/metrics
tailscale.com/util/dnsname from tailscale.com/tailcfg

@ -258,6 +258,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
tailscale.com/types/tkatype from tailscale.com/types/key+
tailscale.com/types/views from tailscale.com/tailcfg+
tailscale.com/util/backoff from tailscale.com/cmd/tailscale/cli
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/net/netcheck+
tailscale.com/util/cloudenv from tailscale.com/net/dnscache+

@ -151,6 +151,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/types/tkatype from tailscale.com/control/controlclient+
tailscale.com/types/views from tailscale.com/appc+
tailscale.com/util/backoff from tailscale.com/control/controlclient+
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/appc+

@ -170,6 +170,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/types/tkatype from tailscale.com/control/controlclient+
tailscale.com/types/views from tailscale.com/appc+
tailscale.com/util/backoff from tailscale.com/control/controlclient+
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/appc+

@ -427,6 +427,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
tailscale.com/types/tkatype from tailscale.com/tka+
tailscale.com/types/views from tailscale.com/ipn/ipnlocal+
tailscale.com/util/backoff from tailscale.com/cmd/tailscaled+
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/control/controlclient+

@ -334,6 +334,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
tailscale.com/types/tkatype from tailscale.com/client/local+
tailscale.com/types/views from tailscale.com/appc+
tailscale.com/util/backoff from tailscale.com/control/controlclient+
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/appc+

@ -141,15 +141,12 @@ func (r nopRead) Read(p []byte) (int, error) {
return len(p), nil
}
var sinkU32 uint32
func BenchmarkReadUint32(b *testing.B) {
func BenchmarkReadFrameHeader(b *testing.B) {
r := bufio.NewReader(nopRead{})
var err error
b.ReportAllocs()
b.ResetTimer()
for range b.N {
sinkU32, err = readUint32(r)
_, _, err := ReadFrameHeader(r)
if err != nil {
b.Fatal(err)
}

@ -183,21 +183,6 @@ func writeUint32(bw *bufio.Writer, v uint32) error {
return nil
}
func readUint32(br *bufio.Reader) (uint32, error) {
var b [4]byte
// Reading a byte at a time is a bit silly,
// but it causes b not to escape,
// which more than pays for the silliness.
for i := range &b {
c, err := br.ReadByte()
if err != nil {
return 0, err
}
b[i] = c
}
return bin.Uint32(b[:]), nil
}
// ReadFrameTypeHeader reads a frame header from br and
// verifies that the frame type matches wantType.
//
@ -213,18 +198,16 @@ func ReadFrameTypeHeader(br *bufio.Reader, wantType FrameType) (frameLen uint32,
return frameLen, err
}
// ReadFrameHeader reads the header of a DERP frame,
// reading 5 bytes from br.
// ReadFrameHeader reads a DERP frame header ([FrameHeaderLen] bytes) from br.
// It uses Peek+Discard to read directly from bufio's internal buffer
// without copying or allocating.
func ReadFrameHeader(br *bufio.Reader) (t FrameType, frameLen uint32, err error) {
tb, err := br.ReadByte()
if err != nil {
return 0, 0, err
}
frameLen, err = readUint32(br)
hdr, err := br.Peek(FrameHeaderLen)
if err != nil {
return 0, 0, err
}
return FrameType(tb), frameLen, nil
defer br.Discard(FrameHeaderLen)
return FrameType(hdr[0]), bin.Uint32(hdr[1:FrameHeaderLen]), nil
}
// readFrame reads a frame header and then reads its payload into

@ -34,6 +34,65 @@ type (
Client = derp.Client
)
func TestReadFrameHeader(t *testing.T) {
tests := []struct {
name string
input [5]byte
wantType derp.FrameType
wantLen uint32
}{
{
name: "SendPacket",
input: [5]byte{byte(derp.FrameSendPacket), 0x00, 0x00, 0x04, 0x00},
wantType: derp.FrameSendPacket,
wantLen: 1024,
},
{
name: "KeepAlive",
input: [5]byte{byte(derp.FrameKeepAlive), 0x00, 0x00, 0x00, 0x00},
wantType: derp.FrameKeepAlive,
wantLen: 0,
},
{
name: "MaxLen",
input: [5]byte{byte(derp.FrameRecvPacket), 0xff, 0xff, 0xff, 0xff},
wantType: derp.FrameRecvPacket,
wantLen: 0xffffffff,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
br := bufio.NewReader(bytes.NewReader(tt.input[:]))
gotType, gotLen, err := derp.ReadFrameHeader(br)
if err != nil {
t.Fatalf("ReadFrameHeader: %v", err)
}
if gotType != tt.wantType {
t.Errorf("type = %v, want %v", gotType, tt.wantType)
}
if gotLen != tt.wantLen {
t.Errorf("len = %v, want %v", gotLen, tt.wantLen)
}
})
}
// Verify zero allocations.
buf := make([]byte, 4096)
rd := bytes.NewReader(buf)
br := bufio.NewReader(rd)
got := testing.AllocsPerRun(1000, func() {
rd.Reset(buf)
br.Reset(rd)
_, _, err := derp.ReadFrameHeader(br)
if err != nil {
t.Fatalf("ReadFrameHeader: %v", err)
}
})
if got != 0 {
t.Fatalf("ReadFrameHeader allocs = %f, want 0", got)
}
}
func TestClientInfoUnmarshal(t *testing.T) {
for i, in := range map[string]struct {
json string

@ -52,6 +52,7 @@ import (
"tailscale.com/tstime/rate"
"tailscale.com/types/key"
"tailscale.com/types/logger"
"tailscale.com/util/bufiox"
"tailscale.com/util/ctxkey"
"tailscale.com/util/mak"
"tailscale.com/util/set"
@ -1088,10 +1089,10 @@ func (c *sclient) handleFramePing(ft derp.FrameType, fl uint32) error {
// space for future extensibility, but not too much.
return fmt.Errorf("ping body too large: %v", fl)
}
_, err := io.ReadFull(c.br, m[:])
if err != nil {
if _, err := bufiox.ReadFull(c.br, m[:]); err != nil {
return err
}
var err error
if extra := int64(fl) - int64(len(m)); extra > 0 {
_, err = io.CopyN(io.Discard, c.br, extra)
}

@ -329,6 +329,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
tailscale.com/types/tkatype from tailscale.com/client/local+
tailscale.com/types/views from tailscale.com/appc+
tailscale.com/util/backoff from tailscale.com/control/controlclient+
tailscale.com/util/bufiox from tailscale.com/types/key
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
tailscale.com/util/cibuild from tailscale.com/health+
tailscale.com/util/clientmetric from tailscale.com/appc+

@ -15,6 +15,7 @@ import (
"golang.org/x/crypto/curve25519"
"golang.org/x/crypto/nacl/box"
"tailscale.com/types/structs"
"tailscale.com/util/bufiox"
)
const (
@ -242,28 +243,14 @@ func (k NodePublic) AppendTo(buf []byte) []byte {
}
// ReadRawWithoutAllocating initializes k with bytes read from br.
// The reading is done ~4x slower than io.ReadFull, but in exchange is
// allocation-free.
// It uses [bufiox.ReadFull] to read without heap allocations.
func (k *NodePublic) ReadRawWithoutAllocating(br *bufio.Reader) error {
var z NodePublic
if *k != z {
return errors.New("refusing to read into non-zero NodePublic")
}
// This is ~4x slower than io.ReadFull, but using io.ReadFull
// causes one extra alloc, which is significant for the DERP
// server that consumes this method. So, process stuff slower but
// without allocation.
//
// Dear future: if io.ReadFull stops causing stuff to escape, you
// should switch back to that.
for i := range k.k {
b, err := br.ReadByte()
if err != nil {
_, err := bufiox.ReadFull(br, k.k[:])
return err
}
k.k[i] = b
}
return nil
}
// WriteRawWithoutAllocating writes out k as 32 bytes to bw.

@ -125,6 +125,25 @@ func TestNodeReadRawWithoutAllocating(t *testing.T) {
}
}
func BenchmarkNodeReadRawWithoutAllocating(b *testing.B) {
buf := make([]byte, 32)
for i := range buf {
buf[i] = 0x42
}
r := bytes.NewReader(buf)
br := bufio.NewReader(r)
b.ReportAllocs()
b.ResetTimer()
for range b.N {
r.Reset(buf)
br.Reset(r)
var k NodePublic
if err := k.ReadRawWithoutAllocating(br); err != nil {
b.Fatal(err)
}
}
}
func TestNodeWriteRawWithoutAllocating(t *testing.T) {
buf := make([]byte, 0, 32)
w := bytes.NewBuffer(buf)

@ -0,0 +1,31 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
// Package bufiox provides extensions to the standard bufio package.
package bufiox
import "io"
// BufferedReader is an interface for readers that support peeking
// into an internal buffer, like [bufio.Reader].
type BufferedReader interface {
Peek(n int) ([]byte, error)
Discard(n int) (discarded int, err error)
}
// ReadFull reads exactly len(buf) bytes from r into buf, like
// [io.ReadFull], but without heap allocations. It uses Peek to
// access the buffered data directly, copies it into buf, then
// discards the consumed bytes. If an error occurs,
// discard is not called and the buffer is left unchanged.
func ReadFull(r BufferedReader, buf []byte) (int, error) {
b, err := r.Peek(len(buf))
if err != nil {
if len(b) > 0 && err == io.EOF {
err = io.ErrUnexpectedEOF
}
return 0, err
}
defer r.Discard(len(buf))
return copy(buf, b), nil
}

@ -0,0 +1,98 @@
// Copyright (c) Tailscale Inc & contributors
// SPDX-License-Identifier: BSD-3-Clause
package bufiox
import (
"bufio"
"bytes"
"io"
"testing"
)
func TestReadFull(t *testing.T) {
data := []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}
br := bufio.NewReader(bytes.NewReader(data))
var buf [5]byte
n, err := ReadFull(br, buf[:])
if err != nil {
t.Fatalf("ReadFull: %v", err)
}
if n != len(buf) {
t.Fatalf("n = %d, want %d", n, len(buf))
}
if want := [5]byte{0x01, 0x02, 0x03, 0x04, 0x05}; buf != want {
t.Fatalf("buf = %v, want %v", buf, want)
}
// Remaining bytes should still be readable.
var rest [3]byte
n, err = ReadFull(br, rest[:])
if err != nil {
t.Fatalf("ReadFull rest: %v", err)
}
if n != len(rest) {
t.Fatalf("rest n = %d, want %d", n, len(rest))
}
if want := [3]byte{0x06, 0x07, 0x08}; rest != want {
t.Fatalf("rest = %v, want %v", rest, want)
}
}
func TestReadFullShort(t *testing.T) {
data := []byte{0x01, 0x02}
br := bufio.NewReader(bytes.NewReader(data))
var buf [5]byte
_, err := ReadFull(br, buf[:])
if err != io.ErrUnexpectedEOF {
t.Fatalf("err = %v, want %v", err, io.ErrUnexpectedEOF)
}
}
func TestReadFullEmpty(t *testing.T) {
br := bufio.NewReader(bytes.NewReader(nil))
var buf [1]byte
_, err := ReadFull(br, buf[:])
if err != io.EOF {
t.Fatalf("err = %v, want %v", err, io.EOF)
}
}
func TestReadFullZeroAllocs(t *testing.T) {
data := make([]byte, 64)
rd := bytes.NewReader(data)
br := bufio.NewReader(rd)
var buf [32]byte
got := testing.AllocsPerRun(1000, func() {
rd.Reset(data)
br.Reset(rd)
_, err := ReadFull(br, buf[:])
if err != nil {
t.Fatalf("ReadFull: %v", err)
}
})
if got != 0 {
t.Fatalf("ReadFull allocs = %f, want 0", got)
}
}
type nopReader struct{}
func (nopReader) Read(p []byte) (int, error) { return len(p), nil }
func BenchmarkReadFull(b *testing.B) {
br := bufio.NewReader(nopReader{})
var buf [32]byte
b.ReportAllocs()
b.ResetTimer()
for range b.N {
_, err := ReadFull(br, buf[:])
if err != nil {
b.Fatal(err)
}
}
}
Loading…
Cancel
Save