util/hashx: move from sha256x (#5388)

This commit is contained in:
Joe Tsai
2022-08-16 13:15:33 -07:00
committed by GitHub
parent f061d20c9d
commit 03f7e4e577
4 changed files with 2 additions and 2 deletions
+198
View File
@@ -0,0 +1,198 @@
// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package hashx provides a concrete implementation of [hash.Hash]
// that operates on a particular block size.
package hashx
import (
"encoding/binary"
"fmt"
"hash"
"unsafe"
)
var _ hash.Hash = (*Block512)(nil)
// Block512 wraps a [hash.Hash] for functions that operate on 512-bit block sizes.
// It has efficient methods for hashing fixed-width integers.
//
// A hashing algorithm that operates on 512-bit block sizes should be used.
// The hash still operates correctly even with misaligned block sizes,
// but operates less efficiently.
//
// Example algorithms with 512-bit block sizes include:
// - MD4 (https://golang.org/x/crypto/md4)
// - MD5 (https://golang.org/pkg/crypto/md5)
// - BLAKE2s (https://golang.org/x/crypto/blake2s)
// - BLAKE3
// - RIPEMD (https://golang.org/x/crypto/ripemd160)
// - SHA-0
// - SHA-1 (https://golang.org/pkg/crypto/sha1)
// - SHA-2 (https://golang.org/pkg/crypto/sha256)
// - Whirlpool
//
// See https://en.wikipedia.org/wiki/Comparison_of_cryptographic_hash_functions#Parameters
// for a list of hash functions and their block sizes.
//
// Block512 assumes that [hash.Hash.Write] never fails and
// never allows the provided buffer to escape.
type Block512 struct {
hash.Hash
x [512 / 8]byte
nx int
}
// New512 constructs a new Block512 that wraps h.
//
// It reports an error if the block sizes do not match.
// Misaligned block sizes perform poorly, but execute correctly.
// The error may be ignored if performance is not a concern.
func New512(h hash.Hash) (*Block512, error) {
b := &Block512{Hash: h}
if len(b.x)%h.BlockSize() != 0 {
return b, fmt.Errorf("hashx.Block512: inefficient use of hash.Hash with %d-bit block size", 8*h.BlockSize())
}
return b, nil
}
// Write hashes the contents of b.
func (h *Block512) Write(b []byte) (int, error) {
h.HashBytes(b)
return len(b), nil
}
// Sum appends the current hash to b and returns the resulting slice.
//
// It flushes any partially completed blocks to the underlying [hash.Hash],
// which may cause future operations to be misaligned and less efficient
// until [Block512.Reset] is called.
func (h *Block512) Sum(b []byte) []byte {
if h.nx > 0 {
h.Hash.Write(h.x[:h.nx])
h.nx = 0
}
// Unfortunately hash.Hash.Sum always causes the input to escape since
// escape analysis cannot prove anything past an interface method call.
// Assuming h already escapes, we call Sum with h.x first,
// and then copy the result to b.
sum := h.Hash.Sum(h.x[:0])
return append(b, sum...)
}
// Reset resets Block512 to its initial state.
// It recursively resets the underlying [hash.Hash].
func (h *Block512) Reset() {
h.Hash.Reset()
h.nx = 0
}
// HashUint8 hashes n as a 1-byte integer.
func (h *Block512) HashUint8(n uint8) {
// NOTE: This method is carefully written to be inlineable.
if h.nx <= len(h.x)-1 {
h.x[h.nx] = n
h.nx += 1
} else {
h.hashUint8Slow(n) // mark "noinline" to keep this within inline budget
}
}
//go:noinline
func (h *Block512) hashUint8Slow(n uint8) { h.hashUint(uint64(n), 1) }
// HashUint16 hashes n as a 2-byte little-endian integer.
func (h *Block512) HashUint16(n uint16) {
// NOTE: This method is carefully written to be inlineable.
if h.nx <= len(h.x)-2 {
binary.LittleEndian.PutUint16(h.x[h.nx:], n)
h.nx += 2
} else {
h.hashUint16Slow(n) // mark "noinline" to keep this within inline budget
}
}
//go:noinline
func (h *Block512) hashUint16Slow(n uint16) { h.hashUint(uint64(n), 2) }
// HashUint32 hashes n as a 4-byte little-endian integer.
func (h *Block512) HashUint32(n uint32) {
// NOTE: This method is carefully written to be inlineable.
if h.nx <= len(h.x)-4 {
binary.LittleEndian.PutUint32(h.x[h.nx:], n)
h.nx += 4
} else {
h.hashUint32Slow(n) // mark "noinline" to keep this within inline budget
}
}
//go:noinline
func (h *Block512) hashUint32Slow(n uint32) { h.hashUint(uint64(n), 4) }
// HashUint64 hashes n as a 8-byte little-endian integer.
func (h *Block512) HashUint64(n uint64) {
// NOTE: This method is carefully written to be inlineable.
if h.nx <= len(h.x)-8 {
binary.LittleEndian.PutUint64(h.x[h.nx:], n)
h.nx += 8
} else {
h.hashUint64Slow(n) // mark "noinline" to keep this within inline budget
}
}
//go:noinline
func (h *Block512) hashUint64Slow(n uint64) { h.hashUint(uint64(n), 8) }
func (h *Block512) hashUint(n uint64, i int) {
for ; i > 0; i-- {
if h.nx == len(h.x) {
h.Hash.Write(h.x[:])
h.nx = 0
}
h.x[h.nx] = byte(n)
h.nx += 1
n >>= 8
}
}
// HashBytes hashes the contents of b.
// It does not explicitly hash the length separately.
func (h *Block512) HashBytes(b []byte) {
// Nearly identical to sha256.digest.Write.
if h.nx > 0 {
n := copy(h.x[h.nx:], b)
h.nx += n
if h.nx == len(h.x) {
h.Hash.Write(h.x[:])
h.nx = 0
}
b = b[n:]
}
if len(b) >= len(h.x) {
n := len(b) &^ (len(h.x) - 1) // n is a multiple of len(h.x)
h.Hash.Write(b[:n])
b = b[n:]
}
if len(b) > 0 {
h.nx = copy(h.x[:], b)
}
}
// HashString hashes the contents of s.
// It does not explicitly hash the length separately.
func (h *Block512) HashString(s string) {
// TODO: Avoid unsafe when standard hashers implement io.StringWriter.
// See https://go.dev/issue/38776.
type stringHeader struct {
p unsafe.Pointer
n int
}
p := (*stringHeader)(unsafe.Pointer(&s))
b := unsafe.Slice((*byte)(p.p), p.n)
h.HashBytes(b)
}
// TODO: Add Hash.MarshalBinary and Hash.UnmarshalBinary?
+205
View File
@@ -0,0 +1,205 @@
// Copyright (c) 2022 Tailscale Inc & AUTHORS All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package hashx
import (
"crypto/sha256"
"encoding/binary"
"hash"
"math/rand"
"testing"
qt "github.com/frankban/quicktest"
"tailscale.com/util/must"
)
// naiveHash is an obviously correct implementation of Hash.
type naiveHash struct {
hash.Hash
scratch [256]byte
}
func newNaive() *naiveHash { return &naiveHash{Hash: sha256.New()} }
func (h *naiveHash) HashUint8(n uint8) { h.Write(append(h.scratch[:0], n)) }
func (h *naiveHash) HashUint16(n uint16) { h.Write(binary.LittleEndian.AppendUint16(h.scratch[:0], n)) }
func (h *naiveHash) HashUint32(n uint32) { h.Write(binary.LittleEndian.AppendUint32(h.scratch[:0], n)) }
func (h *naiveHash) HashUint64(n uint64) { h.Write(binary.LittleEndian.AppendUint64(h.scratch[:0], n)) }
func (h *naiveHash) HashBytes(b []byte) { h.Write(b) }
func (h *naiveHash) HashString(s string) { h.Write(append(h.scratch[:0], s...)) }
var bytes = func() (out []byte) {
out = make([]byte, 130)
for i := range out {
out[i] = byte(i)
}
return out
}()
type hasher interface {
HashUint8(uint8)
HashUint16(uint16)
HashUint32(uint32)
HashUint64(uint64)
HashBytes([]byte)
HashString(string)
}
func hashSuite(h hasher) {
for i := 0; i < 10; i++ {
for j := 0; j < 10; j++ {
h.HashUint8(0x01)
h.HashUint8(0x23)
h.HashUint32(0x456789ab)
h.HashUint8(0xcd)
h.HashUint8(0xef)
h.HashUint16(0x0123)
h.HashUint32(0x456789ab)
h.HashUint16(0xcdef)
h.HashUint8(0x01)
h.HashUint64(0x23456789abcdef01)
h.HashUint16(0x2345)
h.HashUint8(0x67)
h.HashUint16(0x89ab)
h.HashUint8(0xcd)
}
b := bytes[:(i+1)*13]
if i%2 == 0 {
h.HashBytes(b)
} else {
h.HashString(string(b))
}
}
}
func Test(t *testing.T) {
c := qt.New(t)
h1 := must.Get(New512(sha256.New()))
h2 := newNaive()
hashSuite(h1)
hashSuite(h2)
c.Assert(h1.Sum(nil), qt.DeepEquals, h2.Sum(nil))
}
func TestAllocations(t *testing.T) {
c := qt.New(t)
c.Run("Sum", func(c *qt.C) {
h := must.Get(New512(sha256.New()))
c.Assert(testing.AllocsPerRun(100, func() {
var a [sha256.Size]byte
h.Sum(a[:0])
}), qt.Equals, 0.0)
})
c.Run("HashUint8", func(c *qt.C) {
h := must.Get(New512(sha256.New()))
c.Assert(testing.AllocsPerRun(100, func() {
h.HashUint8(0x01)
}), qt.Equals, 0.0)
})
c.Run("HashUint16", func(c *qt.C) {
h := must.Get(New512(sha256.New()))
c.Assert(testing.AllocsPerRun(100, func() {
h.HashUint16(0x0123)
}), qt.Equals, 0.0)
})
c.Run("HashUint32", func(c *qt.C) {
h := must.Get(New512(sha256.New()))
c.Assert(testing.AllocsPerRun(100, func() {
h.HashUint32(0x01234567)
}), qt.Equals, 0.0)
})
c.Run("HashUint64", func(c *qt.C) {
h := must.Get(New512(sha256.New()))
c.Assert(testing.AllocsPerRun(100, func() {
h.HashUint64(0x0123456789abcdef)
}), qt.Equals, 0.0)
})
c.Run("HashBytes", func(c *qt.C) {
h := must.Get(New512(sha256.New()))
c.Assert(testing.AllocsPerRun(100, func() {
h.HashBytes(bytes)
}), qt.Equals, 0.0)
})
c.Run("HashString", func(c *qt.C) {
h := must.Get(New512(sha256.New()))
c.Assert(testing.AllocsPerRun(100, func() {
h.HashString("abcdefghijklmnopqrstuvwxyz")
}), qt.Equals, 0.0)
})
}
func Fuzz(f *testing.F) {
f.Fuzz(func(t *testing.T, seed int64) {
c := qt.New(t)
execute := func(h hasher, r *rand.Rand) {
for i := 0; i < r.Intn(256); i++ {
switch r.Intn(5) {
case 0:
n := uint8(r.Uint64())
h.HashUint8(n)
case 1:
n := uint16(r.Uint64())
h.HashUint16(n)
case 2:
n := uint32(r.Uint64())
h.HashUint32(n)
case 3:
n := uint64(r.Uint64())
h.HashUint64(n)
case 4:
b := make([]byte, r.Intn(256))
r.Read(b)
h.HashBytes(b)
}
}
}
r1 := rand.New(rand.NewSource(seed))
r2 := rand.New(rand.NewSource(seed))
h1 := must.Get(New512(sha256.New()))
h2 := newNaive()
execute(h1, r1)
execute(h2, r2)
c.Assert(h1.Sum(nil), qt.DeepEquals, h2.Sum(nil))
execute(h1, r1)
execute(h2, r2)
c.Assert(h1.Sum(nil), qt.DeepEquals, h2.Sum(nil))
h1.Reset()
h2.Reset()
execute(h1, r1)
execute(h2, r2)
c.Assert(h1.Sum(nil), qt.DeepEquals, h2.Sum(nil))
})
}
func Benchmark(b *testing.B) {
var sum [sha256.Size]byte
b.Run("Hash", func(b *testing.B) {
b.ReportAllocs()
h := must.Get(New512(sha256.New()))
for i := 0; i < b.N; i++ {
h.Reset()
hashSuite(h)
h.Sum(sum[:0])
}
})
b.Run("Naive", func(b *testing.B) {
b.ReportAllocs()
h := newNaive()
for i := 0; i < b.N; i++ {
h.Reset()
hashSuite(h)
h.Sum(sum[:0])
}
})
}