From 1efd8d6c75111f8b50d094248f7456fe2b598d80 Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 27 Oct 2015 09:26:08 +0100 Subject: [PATCH 1/2] Add benchmark of HashFile --- lib/scanner/.gitignore | 1 + lib/scanner/walk_test.go | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 lib/scanner/.gitignore diff --git a/lib/scanner/.gitignore b/lib/scanner/.gitignore new file mode 100644 index 00000000..46765e06 --- /dev/null +++ b/lib/scanner/.gitignore @@ -0,0 +1 @@ +_random.data diff --git a/lib/scanner/walk_test.go b/lib/scanner/walk_test.go index 45cc0e31..689f3fc5 100644 --- a/lib/scanner/walk_test.go +++ b/lib/scanner/walk_test.go @@ -8,13 +8,16 @@ package scanner import ( "bytes" + "crypto/rand" "fmt" + "io" "os" "path/filepath" "reflect" "runtime" rdebug "runtime/debug" "sort" + "sync" "testing" "github.com/syncthing/syncthing/lib/ignore" @@ -372,3 +375,39 @@ func TestSymlinkTypeEqual(t *testing.T) { } } } + +var initOnce sync.Once + +const ( + testdataSize = 17 << 20 + testdataName = "_random.data" +) + +func BenchmarkHashFile(b *testing.B) { + initOnce.Do(initTestFile) + b.ResetTimer() + + for i := 0; i < b.N; i++ { + if _, err := HashFile(testdataName, protocol.BlockSize, testdataSize, nil); err != nil { + b.Fatal(err) + } + } + + b.ReportAllocs() +} + +func initTestFile() { + fd, err := os.Create(testdataName) + if err != nil { + panic(err) + } + + lr := io.LimitReader(rand.Reader, testdataSize) + if _, err := io.Copy(fd, lr); err != nil { + panic(err) + } + + if err := fd.Close(); err != nil { + panic(err) + } +} From dc32f7f0a3239c529c92cacd1a08231b5e8fe0c7 Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Tue, 27 Oct 2015 09:31:28 +0100 Subject: [PATCH 2/2] Reduce allocations in HashFile By using copyBuffer we avoid a buffer allocation for each block we hash, and by allocating space for the hashes up front we get one large backing array instead of a small one for each block. For a 17 MiB file this makes quite a difference in the amount of memory allocated: benchmark old ns/op new ns/op delta BenchmarkHashFile-8 102045110 100459158 -1.55% benchmark old allocs new allocs delta BenchmarkHashFile-8 415 144 -65.30% benchmark old bytes new bytes delta BenchmarkHashFile-8 4504296 48104 -98.93% --- lib/scanner/blocks.go | 79 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 71 insertions(+), 8 deletions(-) diff --git a/lib/scanner/blocks.go b/lib/scanner/blocks.go index c9051677..455aa4cb 100644 --- a/lib/scanner/blocks.go +++ b/lib/scanner/blocks.go @@ -20,15 +20,27 @@ var SHA256OfNothing = []uint8{0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x // Blocks returns the blockwise hash of the reader. func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]protocol.BlockInfo, error) { - var blocks []protocol.BlockInfo - if sizehint > 0 { - blocks = make([]protocol.BlockInfo, 0, int(sizehint/int64(blocksize))) - } - var offset int64 hf := sha256.New() + hashLength := hf.Size() + + var blocks []protocol.BlockInfo + var hashes, thisHash []byte + + if sizehint > 0 { + // Allocate contiguous blocks for the BlockInfo structures and their + // hashes once and for all. + numBlocks := int(sizehint / int64(blocksize)) + blocks = make([]protocol.BlockInfo, 0, numBlocks) + hashes = make([]byte, 0, hashLength*numBlocks) + } + + // A 32k buffer is used for copying into the hash function. + buf := make([]byte, 32<<10) + + var offset int64 for { - lr := &io.LimitedReader{R: r, N: int64(blocksize)} - n, err := io.Copy(hf, lr) + lr := io.LimitReader(r, int64(blocksize)) + n, err := copyBuffer(hf, lr, buf) if err != nil { return nil, err } @@ -41,11 +53,17 @@ func Blocks(r io.Reader, blocksize int, sizehint int64, counter *int64) ([]proto atomic.AddInt64(counter, int64(n)) } + // Carve out a hash-sized chunk of "hashes" to store the hash for this + // block. + hashes = hf.Sum(hashes) + thisHash, hashes = hashes[:hashLength], hashes[hashLength:] + b := protocol.BlockInfo{ Size: int32(n), Offset: offset, - Hash: hf.Sum(nil), + Hash: thisHash, } + blocks = append(blocks, b) offset += int64(n) @@ -158,3 +176,48 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool { } return true } + +// This is a copy & paste of io.copyBuffer from the Go 1.5 standard library, +// as we want this but also want to build with Go 1.3+. + +// copyBuffer is the actual implementation of Copy and CopyBuffer. +// if buf is nil, one is allocated. +func copyBuffer(dst io.Writer, src io.Reader, buf []byte) (written int64, err error) { + // If the reader has a WriteTo method, use it to do the copy. + // Avoids an allocation and a copy. + if wt, ok := src.(io.WriterTo); ok { + return wt.WriteTo(dst) + } + // Similarly, if the writer has a ReadFrom method, use it to do the copy. + if rt, ok := dst.(io.ReaderFrom); ok { + return rt.ReadFrom(src) + } + if buf == nil { + buf = make([]byte, 32*1024) + } + for { + nr, er := src.Read(buf) + if nr > 0 { + nw, ew := dst.Write(buf[0:nr]) + if nw > 0 { + written += int64(nw) + } + if ew != nil { + err = ew + break + } + if nr != nw { + err = io.ErrShortWrite + break + } + } + if er == io.EOF { + break + } + if er != nil { + err = er + break + } + } + return written, err +}