derp,types,util: use bufio Peek+Discard for allocation-free fast reads (#19067)
Replace byte-at-a-time ReadByte loops with Peek+Discard in the DERP
read path. Peek returns a slice into bufio's internal buffer without
allocating, and Discard advances the read pointer without copying.
Introduce util/bufiox with a BufferedReader interface and ReadFull
helper that uses Peek+copy+Discard as an allocation-free alternative
to io.ReadFull.
- derp.ReadFrameHeader: replace 5× ReadByte with Peek(5)+Discard(5),
reading the frame type and length directly from the peeked slice.
Remove now-unused readUint32 helper.
name old ns/op new ns/op speedup
ReadFrameHeader-8 24.2 12.4 ~2x
(0 allocs/op in both)
- key.NodePublic.ReadRawWithoutAllocating: replace 32× ReadByte with
bufiox.ReadFull. Addresses the "Dear future" comment about switching
away from byte-at-a-time reads once a non-escaping alternative exists.
name old ns/op new ns/op speedup
NodeReadRawWithoutAllocating-8 140 43.6 ~3.2x
(0 allocs/op in both)
- derpserver.handleFramePing: replace io.ReadFull with bufiox.ReadFull.
Updates tailscale/corp#38509
Signed-off-by: Mike O'Driscoll <mikeo@tailscale.com>
This commit is contained in:
@@ -142,6 +142,7 @@ tailscale.com/cmd/derper dependencies: (generated by github.com/tailscale/depawa
|
||||
tailscale.com/types/structs from tailscale.com/ipn+
|
||||
tailscale.com/types/tkatype from tailscale.com/client/local+
|
||||
tailscale.com/types/views from tailscale.com/ipn+
|
||||
tailscale.com/util/bufiox from tailscale.com/derp/derpserver+
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/net/netmon+
|
||||
tailscale.com/util/cloudenv from tailscale.com/hostinfo+
|
||||
|
||||
@@ -933,6 +933,7 @@ tailscale.com/cmd/k8s-operator dependencies: (generated by github.com/tailscale/
|
||||
tailscale.com/types/tkatype from tailscale.com/client/local+
|
||||
tailscale.com/types/views from tailscale.com/appc+
|
||||
tailscale.com/util/backoff from tailscale.com/cmd/k8s-operator+
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/cmd/k8s-operator+
|
||||
|
||||
@@ -75,6 +75,7 @@ tailscale.com/cmd/stund dependencies: (generated by github.com/tailscale/depawar
|
||||
tailscale.com/types/structs from tailscale.com/tailcfg+
|
||||
tailscale.com/types/tkatype from tailscale.com/tailcfg+
|
||||
tailscale.com/types/views from tailscale.com/net/tsaddr+
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/ctxkey from tailscale.com/tsweb+
|
||||
L 💣 tailscale.com/util/dirwalk from tailscale.com/metrics
|
||||
tailscale.com/util/dnsname from tailscale.com/tailcfg
|
||||
|
||||
@@ -258,6 +258,7 @@ tailscale.com/cmd/tailscale dependencies: (generated by github.com/tailscale/dep
|
||||
tailscale.com/types/tkatype from tailscale.com/types/key+
|
||||
tailscale.com/types/views from tailscale.com/tailcfg+
|
||||
tailscale.com/util/backoff from tailscale.com/cmd/tailscale/cli
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/net/netcheck+
|
||||
tailscale.com/util/cloudenv from tailscale.com/net/dnscache+
|
||||
|
||||
@@ -151,6 +151,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
||||
tailscale.com/types/tkatype from tailscale.com/control/controlclient+
|
||||
tailscale.com/types/views from tailscale.com/appc+
|
||||
tailscale.com/util/backoff from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/appc+
|
||||
|
||||
@@ -170,6 +170,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
||||
tailscale.com/types/tkatype from tailscale.com/control/controlclient+
|
||||
tailscale.com/types/views from tailscale.com/appc+
|
||||
tailscale.com/util/backoff from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/appc+
|
||||
|
||||
@@ -427,6 +427,7 @@ tailscale.com/cmd/tailscaled dependencies: (generated by github.com/tailscale/de
|
||||
tailscale.com/types/tkatype from tailscale.com/tka+
|
||||
tailscale.com/types/views from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/backoff from tailscale.com/cmd/tailscaled+
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/control/controlclient+
|
||||
|
||||
@@ -334,6 +334,7 @@ tailscale.com/cmd/tsidp dependencies: (generated by github.com/tailscale/depawar
|
||||
tailscale.com/types/tkatype from tailscale.com/client/local+
|
||||
tailscale.com/types/views from tailscale.com/appc+
|
||||
tailscale.com/util/backoff from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/appc+
|
||||
|
||||
+2
-5
@@ -141,15 +141,12 @@ func (r nopRead) Read(p []byte) (int, error) {
|
||||
return len(p), nil
|
||||
}
|
||||
|
||||
var sinkU32 uint32
|
||||
|
||||
func BenchmarkReadUint32(b *testing.B) {
|
||||
func BenchmarkReadFrameHeader(b *testing.B) {
|
||||
r := bufio.NewReader(nopRead{})
|
||||
var err error
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
sinkU32, err = readUint32(r)
|
||||
_, _, err := ReadFrameHeader(r)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
||||
+6
-23
@@ -183,21 +183,6 @@ func writeUint32(bw *bufio.Writer, v uint32) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func readUint32(br *bufio.Reader) (uint32, error) {
|
||||
var b [4]byte
|
||||
// Reading a byte at a time is a bit silly,
|
||||
// but it causes b not to escape,
|
||||
// which more than pays for the silliness.
|
||||
for i := range &b {
|
||||
c, err := br.ReadByte()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
b[i] = c
|
||||
}
|
||||
return bin.Uint32(b[:]), nil
|
||||
}
|
||||
|
||||
// ReadFrameTypeHeader reads a frame header from br and
|
||||
// verifies that the frame type matches wantType.
|
||||
//
|
||||
@@ -213,18 +198,16 @@ func ReadFrameTypeHeader(br *bufio.Reader, wantType FrameType) (frameLen uint32,
|
||||
return frameLen, err
|
||||
}
|
||||
|
||||
// ReadFrameHeader reads the header of a DERP frame,
|
||||
// reading 5 bytes from br.
|
||||
// ReadFrameHeader reads a DERP frame header ([FrameHeaderLen] bytes) from br.
|
||||
// It uses Peek+Discard to read directly from bufio's internal buffer
|
||||
// without copying or allocating.
|
||||
func ReadFrameHeader(br *bufio.Reader) (t FrameType, frameLen uint32, err error) {
|
||||
tb, err := br.ReadByte()
|
||||
hdr, err := br.Peek(FrameHeaderLen)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
frameLen, err = readUint32(br)
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
return FrameType(tb), frameLen, nil
|
||||
defer br.Discard(FrameHeaderLen)
|
||||
return FrameType(hdr[0]), bin.Uint32(hdr[1:FrameHeaderLen]), nil
|
||||
}
|
||||
|
||||
// readFrame reads a frame header and then reads its payload into
|
||||
|
||||
@@ -34,6 +34,65 @@ type (
|
||||
Client = derp.Client
|
||||
)
|
||||
|
||||
func TestReadFrameHeader(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input [5]byte
|
||||
wantType derp.FrameType
|
||||
wantLen uint32
|
||||
}{
|
||||
{
|
||||
name: "SendPacket",
|
||||
input: [5]byte{byte(derp.FrameSendPacket), 0x00, 0x00, 0x04, 0x00},
|
||||
wantType: derp.FrameSendPacket,
|
||||
wantLen: 1024,
|
||||
},
|
||||
{
|
||||
name: "KeepAlive",
|
||||
input: [5]byte{byte(derp.FrameKeepAlive), 0x00, 0x00, 0x00, 0x00},
|
||||
wantType: derp.FrameKeepAlive,
|
||||
wantLen: 0,
|
||||
},
|
||||
{
|
||||
name: "MaxLen",
|
||||
input: [5]byte{byte(derp.FrameRecvPacket), 0xff, 0xff, 0xff, 0xff},
|
||||
wantType: derp.FrameRecvPacket,
|
||||
wantLen: 0xffffffff,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
br := bufio.NewReader(bytes.NewReader(tt.input[:]))
|
||||
gotType, gotLen, err := derp.ReadFrameHeader(br)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFrameHeader: %v", err)
|
||||
}
|
||||
if gotType != tt.wantType {
|
||||
t.Errorf("type = %v, want %v", gotType, tt.wantType)
|
||||
}
|
||||
if gotLen != tt.wantLen {
|
||||
t.Errorf("len = %v, want %v", gotLen, tt.wantLen)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Verify zero allocations.
|
||||
buf := make([]byte, 4096)
|
||||
rd := bytes.NewReader(buf)
|
||||
br := bufio.NewReader(rd)
|
||||
got := testing.AllocsPerRun(1000, func() {
|
||||
rd.Reset(buf)
|
||||
br.Reset(rd)
|
||||
_, _, err := derp.ReadFrameHeader(br)
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFrameHeader: %v", err)
|
||||
}
|
||||
})
|
||||
if got != 0 {
|
||||
t.Fatalf("ReadFrameHeader allocs = %f, want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientInfoUnmarshal(t *testing.T) {
|
||||
for i, in := range map[string]struct {
|
||||
json string
|
||||
|
||||
@@ -52,6 +52,7 @@ import (
|
||||
"tailscale.com/tstime/rate"
|
||||
"tailscale.com/types/key"
|
||||
"tailscale.com/types/logger"
|
||||
"tailscale.com/util/bufiox"
|
||||
"tailscale.com/util/ctxkey"
|
||||
"tailscale.com/util/mak"
|
||||
"tailscale.com/util/set"
|
||||
@@ -1088,10 +1089,10 @@ func (c *sclient) handleFramePing(ft derp.FrameType, fl uint32) error {
|
||||
// space for future extensibility, but not too much.
|
||||
return fmt.Errorf("ping body too large: %v", fl)
|
||||
}
|
||||
_, err := io.ReadFull(c.br, m[:])
|
||||
if err != nil {
|
||||
if _, err := bufiox.ReadFull(c.br, m[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
var err error
|
||||
if extra := int64(fl) - int64(len(m)); extra > 0 {
|
||||
_, err = io.CopyN(io.Discard, c.br, extra)
|
||||
}
|
||||
|
||||
@@ -329,6 +329,7 @@ tailscale.com/tsnet dependencies: (generated by github.com/tailscale/depaware)
|
||||
tailscale.com/types/tkatype from tailscale.com/client/local+
|
||||
tailscale.com/types/views from tailscale.com/appc+
|
||||
tailscale.com/util/backoff from tailscale.com/control/controlclient+
|
||||
tailscale.com/util/bufiox from tailscale.com/types/key
|
||||
tailscale.com/util/checkchange from tailscale.com/ipn/ipnlocal+
|
||||
tailscale.com/util/cibuild from tailscale.com/health+
|
||||
tailscale.com/util/clientmetric from tailscale.com/appc+
|
||||
|
||||
+4
-17
@@ -15,6 +15,7 @@ import (
|
||||
"golang.org/x/crypto/curve25519"
|
||||
"golang.org/x/crypto/nacl/box"
|
||||
"tailscale.com/types/structs"
|
||||
"tailscale.com/util/bufiox"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -242,28 +243,14 @@ func (k NodePublic) AppendTo(buf []byte) []byte {
|
||||
}
|
||||
|
||||
// ReadRawWithoutAllocating initializes k with bytes read from br.
|
||||
// The reading is done ~4x slower than io.ReadFull, but in exchange is
|
||||
// allocation-free.
|
||||
// It uses [bufiox.ReadFull] to read without heap allocations.
|
||||
func (k *NodePublic) ReadRawWithoutAllocating(br *bufio.Reader) error {
|
||||
var z NodePublic
|
||||
if *k != z {
|
||||
return errors.New("refusing to read into non-zero NodePublic")
|
||||
}
|
||||
// This is ~4x slower than io.ReadFull, but using io.ReadFull
|
||||
// causes one extra alloc, which is significant for the DERP
|
||||
// server that consumes this method. So, process stuff slower but
|
||||
// without allocation.
|
||||
//
|
||||
// Dear future: if io.ReadFull stops causing stuff to escape, you
|
||||
// should switch back to that.
|
||||
for i := range k.k {
|
||||
b, err := br.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
k.k[i] = b
|
||||
}
|
||||
return nil
|
||||
_, err := bufiox.ReadFull(br, k.k[:])
|
||||
return err
|
||||
}
|
||||
|
||||
// WriteRawWithoutAllocating writes out k as 32 bytes to bw.
|
||||
|
||||
@@ -125,6 +125,25 @@ func TestNodeReadRawWithoutAllocating(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkNodeReadRawWithoutAllocating(b *testing.B) {
|
||||
buf := make([]byte, 32)
|
||||
for i := range buf {
|
||||
buf[i] = 0x42
|
||||
}
|
||||
r := bytes.NewReader(buf)
|
||||
br := bufio.NewReader(r)
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
r.Reset(buf)
|
||||
br.Reset(r)
|
||||
var k NodePublic
|
||||
if err := k.ReadRawWithoutAllocating(br); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNodeWriteRawWithoutAllocating(t *testing.T) {
|
||||
buf := make([]byte, 0, 32)
|
||||
w := bytes.NewBuffer(buf)
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
// Package bufiox provides extensions to the standard bufio package.
|
||||
package bufiox
|
||||
|
||||
import "io"
|
||||
|
||||
// BufferedReader is an interface for readers that support peeking
|
||||
// into an internal buffer, like [bufio.Reader].
|
||||
type BufferedReader interface {
|
||||
Peek(n int) ([]byte, error)
|
||||
Discard(n int) (discarded int, err error)
|
||||
}
|
||||
|
||||
// ReadFull reads exactly len(buf) bytes from r into buf, like
|
||||
// [io.ReadFull], but without heap allocations. It uses Peek to
|
||||
// access the buffered data directly, copies it into buf, then
|
||||
// discards the consumed bytes. If an error occurs,
|
||||
// discard is not called and the buffer is left unchanged.
|
||||
func ReadFull(r BufferedReader, buf []byte) (int, error) {
|
||||
b, err := r.Peek(len(buf))
|
||||
if err != nil {
|
||||
if len(b) > 0 && err == io.EOF {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return 0, err
|
||||
}
|
||||
defer r.Discard(len(buf))
|
||||
return copy(buf, b), nil
|
||||
}
|
||||
@@ -0,0 +1,98 @@
|
||||
// Copyright (c) Tailscale Inc & contributors
|
||||
// SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
package bufiox
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"io"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestReadFull(t *testing.T) {
|
||||
data := []byte{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}
|
||||
br := bufio.NewReader(bytes.NewReader(data))
|
||||
|
||||
var buf [5]byte
|
||||
n, err := ReadFull(br, buf[:])
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFull: %v", err)
|
||||
}
|
||||
if n != len(buf) {
|
||||
t.Fatalf("n = %d, want %d", n, len(buf))
|
||||
}
|
||||
if want := [5]byte{0x01, 0x02, 0x03, 0x04, 0x05}; buf != want {
|
||||
t.Fatalf("buf = %v, want %v", buf, want)
|
||||
}
|
||||
|
||||
// Remaining bytes should still be readable.
|
||||
var rest [3]byte
|
||||
n, err = ReadFull(br, rest[:])
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFull rest: %v", err)
|
||||
}
|
||||
if n != len(rest) {
|
||||
t.Fatalf("rest n = %d, want %d", n, len(rest))
|
||||
}
|
||||
if want := [3]byte{0x06, 0x07, 0x08}; rest != want {
|
||||
t.Fatalf("rest = %v, want %v", rest, want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadFullShort(t *testing.T) {
|
||||
data := []byte{0x01, 0x02}
|
||||
br := bufio.NewReader(bytes.NewReader(data))
|
||||
|
||||
var buf [5]byte
|
||||
_, err := ReadFull(br, buf[:])
|
||||
if err != io.ErrUnexpectedEOF {
|
||||
t.Fatalf("err = %v, want %v", err, io.ErrUnexpectedEOF)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadFullEmpty(t *testing.T) {
|
||||
br := bufio.NewReader(bytes.NewReader(nil))
|
||||
|
||||
var buf [1]byte
|
||||
_, err := ReadFull(br, buf[:])
|
||||
if err != io.EOF {
|
||||
t.Fatalf("err = %v, want %v", err, io.EOF)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReadFullZeroAllocs(t *testing.T) {
|
||||
data := make([]byte, 64)
|
||||
rd := bytes.NewReader(data)
|
||||
br := bufio.NewReader(rd)
|
||||
|
||||
var buf [32]byte
|
||||
got := testing.AllocsPerRun(1000, func() {
|
||||
rd.Reset(data)
|
||||
br.Reset(rd)
|
||||
_, err := ReadFull(br, buf[:])
|
||||
if err != nil {
|
||||
t.Fatalf("ReadFull: %v", err)
|
||||
}
|
||||
})
|
||||
if got != 0 {
|
||||
t.Fatalf("ReadFull allocs = %f, want 0", got)
|
||||
}
|
||||
}
|
||||
|
||||
type nopReader struct{}
|
||||
|
||||
func (nopReader) Read(p []byte) (int, error) { return len(p), nil }
|
||||
|
||||
func BenchmarkReadFull(b *testing.B) {
|
||||
br := bufio.NewReader(nopReader{})
|
||||
var buf [32]byte
|
||||
b.ReportAllocs()
|
||||
b.ResetTimer()
|
||||
for range b.N {
|
||||
_, err := ReadFull(br, buf[:])
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user