diff --git a/lib/model/rwfolder.go b/lib/model/rwfolder.go index bfe24a00..cac9f13f 100644 --- a/lib/model/rwfolder.go +++ b/lib/model/rwfolder.go @@ -7,6 +7,7 @@ package model import ( + "bytes" "errors" "fmt" "math/rand" @@ -24,6 +25,7 @@ import ( "github.com/syncthing/syncthing/lib/osutil" "github.com/syncthing/syncthing/lib/protocol" "github.com/syncthing/syncthing/lib/scanner" + "github.com/syncthing/syncthing/lib/sha256" "github.com/syncthing/syncthing/lib/sync" "github.com/syncthing/syncthing/lib/versioner" "github.com/syncthing/syncthing/lib/weakhash" @@ -560,7 +562,7 @@ nextFile: // we can just do a rename instead. key := string(fi.Blocks[0].Hash) for i, candidate := range buckets[key] { - if scanner.BlocksEqual(candidate.Blocks, fi.Blocks) { + if blocksEqual(candidate.Blocks, fi.Blocks) { // Remove the candidate from the bucket lidx := len(buckets[key]) - 1 buckets[key][i] = buckets[key][lidx] @@ -615,6 +617,21 @@ nextFile: return changed } +// blocksEqual returns whether two slices of blocks are exactly the same hash +// and index pair wise. +func blocksEqual(src, tgt []protocol.BlockInfo) bool { + if len(tgt) != len(src) { + return false + } + + for i, sblk := range src { + if !bytes.Equal(sblk.Hash, tgt[i].Hash) { + return false + } + } + return true +} + // handleDir creates or updates the given directory func (f *sendReceiveFolder) handleDir(file protocol.FileInfo, dbUpdateChan chan<- dbUpdateJob) { // Used in the defer closure below, updated by the function body. Take @@ -976,7 +993,7 @@ func (f *sendReceiveFolder) renameFile(source, target protocol.FileInfo, dbUpdat func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState, dbUpdateChan chan<- dbUpdateJob) { curFile, hasCurFile := f.model.CurrentFolderFile(f.folderID, file.Name) - have, need := scanner.BlockDiff(curFile.Blocks, file.Blocks) + have, need := blockDiff(curFile.Blocks, file.Blocks) if hasCurFile && len(need) == 0 { // We are supposed to copy the entire file, and then fetch nothing. We @@ -1013,7 +1030,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c tempName := fs.TempName(file.Name) - scanner.PopulateOffsets(file.Blocks) + populateOffsets(file.Blocks) var blocks []protocol.BlockInfo var blocksSize int64 @@ -1024,7 +1041,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c tempBlocks, err := scanner.HashFile(f.ctx, f.fs, tempName, protocol.BlockSize, nil, false) if err == nil { // Check for any reusable blocks in the temp file - tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) + tempCopyBlocks, _ := blockDiff(tempBlocks, file.Blocks) // block.String() returns a string unique to the block existingBlocks := make(map[string]struct{}, len(tempCopyBlocks)) @@ -1108,6 +1125,39 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c copyChan <- cs } +// blockDiff returns lists of common and missing (to transform src into tgt) +// blocks. Both block lists must have been created with the same block size. +func blockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) { + if len(tgt) == 0 && len(src) != 0 { + return nil, nil + } + + if len(tgt) != 0 && len(src) == 0 { + // Copy the entire file + return nil, tgt + } + + for i := range tgt { + if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) { + // Copy differing block + need = append(need, tgt[i]) + } else { + have = append(have, tgt[i]) + } + } + + return have, need +} + +// populateOffsets sets the Offset field on each block +func populateOffsets(blocks []protocol.BlockInfo) { + var offset int64 + for i := range blocks { + blocks[i].Offset = offset + offset += int64(blocks[i].Size) + } +} + // shortcutFile sets file mode and modification time, when that's the only // thing that has changed. func (f *sendReceiveFolder) shortcutFile(file protocol.FileInfo) error { @@ -1207,7 +1257,7 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch buf = buf[:int(block.Size)] found, err := weakHashFinder.Iterate(block.WeakHash, buf, func(offset int64) bool { - if _, err := scanner.VerifyBuffer(buf, block); err != nil { + if _, err := verifyBuffer(buf, block); err != nil { return true } @@ -1242,7 +1292,7 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch return false } - hash, err := scanner.VerifyBuffer(buf, block) + hash, err := verifyBuffer(buf, block) if err != nil { if hash != nil { l.Debugf("Finder block mismatch in %s:%s:%d expected %q got %q", folder, path, index, block.Hash, hash) @@ -1292,6 +1342,24 @@ func (f *sendReceiveFolder) copierRoutine(in <-chan copyBlocksState, pullChan ch } } +func verifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) { + if len(buf) != int(block.Size) { + return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size) + } + hf := sha256.New() + _, err := hf.Write(buf) + if err != nil { + return nil, err + } + hash := hf.Sum(nil) + + if !bytes.Equal(hash, block.Hash) { + return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash) + } + + return hash, nil +} + func (f *sendReceiveFolder) pullerRoutine(in <-chan pullBlockState, out chan<- *sharedPullerState) { for state := range in { if state.failed() != nil { @@ -1346,7 +1414,7 @@ func (f *sendReceiveFolder) pullerRoutine(in <-chan pullBlockState, out chan<- * // Verify that the received block matches the desired hash, if not // try pulling it from another device. - _, lastError = scanner.VerifyBuffer(buf, state.block) + _, lastError = verifyBuffer(buf, state.block) if lastError != nil { l.Debugln("request:", f.folderID, state.file.Name, state.block.Offset, state.block.Size, "hash mismatch") continue diff --git a/lib/model/rwfolder_test.go b/lib/model/rwfolder_test.go index 56fa9c95..1ffcad7f 100644 --- a/lib/model/rwfolder_test.go +++ b/lib/model/rwfolder_test.go @@ -58,6 +58,26 @@ var blocks = []protocol.BlockInfo{ var folders = []string{"default"} +var diffTestData = []struct { + a string + b string + s int + d []protocol.BlockInfo +}{ + {"contents", "contents", 1024, []protocol.BlockInfo{}}, + {"", "", 1024, []protocol.BlockInfo{}}, + {"contents", "contents", 3, []protocol.BlockInfo{}}, + {"contents", "cantents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}}}, + {"contents", "contants", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}}}, + {"contents", "cantants", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}}}, + {"contents", "", 3, []protocol.BlockInfo{{Offset: 0, Size: 0}}}, + {"", "contents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}, {Offset: 6, Size: 2}}}, + {"con", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}}, + {"contents", "con", 3, nil}, + {"contents", "cont", 3, []protocol.BlockInfo{{Offset: 3, Size: 1}}}, + {"cont", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}}, +} + func setUpFile(filename string, blockNumbers []int) protocol.FileInfo { // Create existing file existingBlocks := make([]protocol.BlockInfo, len(blockNumbers)) @@ -651,3 +671,45 @@ func TestIssue3164(t *testing.T) { t.Fatal(err) } } + +func TestDiff(t *testing.T) { + for i, test := range diffTestData { + a, _ := scanner.Blocks(context.TODO(), bytes.NewBufferString(test.a), test.s, -1, nil, false) + b, _ := scanner.Blocks(context.TODO(), bytes.NewBufferString(test.b), test.s, -1, nil, false) + _, d := blockDiff(a, b) + if len(d) != len(test.d) { + t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d)) + } else { + for j := range test.d { + if d[j].Offset != test.d[j].Offset { + t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset) + } + if d[j].Size != test.d[j].Size { + t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size) + } + } + } + } +} + +func TestDiffEmpty(t *testing.T) { + emptyCases := []struct { + a []protocol.BlockInfo + b []protocol.BlockInfo + need int + have int + }{ + {nil, nil, 0, 0}, + {[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0}, + {nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0}, + } + for _, emptyCase := range emptyCases { + h, n := blockDiff(emptyCase.a, emptyCase.b) + if len(h) != emptyCase.have { + t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have) + } + if len(n) != emptyCase.need { + t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have) + } + } +} diff --git a/lib/scanner/blocks.go b/lib/scanner/blocks.go index 1a08daf3..88287b68 100644 --- a/lib/scanner/blocks.go +++ b/lib/scanner/blocks.go @@ -7,9 +7,7 @@ package scanner import ( - "bytes" "context" - "fmt" "hash" "io" @@ -26,18 +24,20 @@ type Counter interface { // Blocks returns the blockwise hash of the reader. func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) { + if counter == nil { + counter = &noopCounter{} + } + hf := sha256.New() hashLength := hf.Size() - var mhf io.Writer - var whf hash.Hash32 - + var weakHf hash.Hash32 = noopHash{} + var multiHf io.Writer = hf if useWeakHashes { - whf = adler32.New() - mhf = io.MultiWriter(hf, whf) - } else { - whf = noopHash{} - mhf = hf + // Use an actual weak hash function, make the multiHf + // write to both hash functions. + weakHf = adler32.New() + multiHf = io.MultiWriter(hf, weakHf) } var blocks []protocol.BlockInfo @@ -65,7 +65,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou } lr.N = int64(blocksize) - n, err := io.CopyBuffer(mhf, lr, buf) + n, err := io.CopyBuffer(multiHf, lr, buf) if err != nil { return nil, err } @@ -74,9 +74,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou break } - if counter != nil { - counter.Update(n) - } + counter.Update(n) // Carve out a hash-sized chunk of "hashes" to store the hash for this // block. @@ -87,14 +85,14 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou Size: int32(n), Offset: offset, Hash: thisHash, - WeakHash: whf.Sum32(), + WeakHash: weakHf.Sum32(), } blocks = append(blocks, b) offset += n hf.Reset() - whf.Reset() + weakHf.Reset() } if len(blocks) == 0 { @@ -109,104 +107,6 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou return blocks, nil } -// PopulateOffsets sets the Offset field on each block -func PopulateOffsets(blocks []protocol.BlockInfo) { - var offset int64 - for i := range blocks { - blocks[i].Offset = offset - offset += int64(blocks[i].Size) - } -} - -// BlockDiff returns lists of common and missing (to transform src into tgt) -// blocks. Both block lists must have been created with the same block size. -func BlockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) { - if len(tgt) == 0 && len(src) != 0 { - return nil, nil - } - - if len(tgt) != 0 && len(src) == 0 { - // Copy the entire file - return nil, tgt - } - - for i := range tgt { - if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) { - // Copy differing block - need = append(need, tgt[i]) - } else { - have = append(have, tgt[i]) - } - } - - return have, need -} - -// Verify returns nil or an error describing the mismatch between the block -// list and actual reader contents -func Verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error { - hf := sha256.New() - // A 32k buffer is used for copying into the hash function. - buf := make([]byte, 32<<10) - - for i, block := range blocks { - lr := &io.LimitedReader{R: r, N: int64(blocksize)} - _, err := io.CopyBuffer(hf, lr, buf) - if err != nil { - return err - } - - hash := hf.Sum(nil) - hf.Reset() - - if !bytes.Equal(hash, block.Hash) { - return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i) - } - } - - // We should have reached the end now - bs := make([]byte, 1) - n, err := r.Read(bs) - if n != 0 || err != io.EOF { - return fmt.Errorf("file continues past end of blocks") - } - - return nil -} - -func VerifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) { - if len(buf) != int(block.Size) { - return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size) - } - hf := sha256.New() - _, err := hf.Write(buf) - if err != nil { - return nil, err - } - hash := hf.Sum(nil) - - if !bytes.Equal(hash, block.Hash) { - return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash) - } - - return hash, nil -} - -// BlocksEqual returns whether two slices of blocks are exactly the same hash -// and index pair wise. -func BlocksEqual(src, tgt []protocol.BlockInfo) bool { - if len(tgt) != len(src) { - return false - } - - for i, sblk := range src { - if !bytes.Equal(sblk.Hash, tgt[i].Hash) { - return false - } - } - return true -} - type noopHash struct{} func (noopHash) Sum32() uint32 { return 0 } @@ -215,3 +115,7 @@ func (noopHash) Size() int { return 0 } func (noopHash) Reset() {} func (noopHash) Sum([]byte) []byte { return nil } func (noopHash) Write([]byte) (int, error) { return 0, nil } + +type noopCounter struct{} + +func (c *noopCounter) Update(bytes int64) {} diff --git a/lib/scanner/blocks_test.go b/lib/scanner/blocks_test.go index 9dab8b6b..8d706dc0 100644 --- a/lib/scanner/blocks_test.go +++ b/lib/scanner/blocks_test.go @@ -104,68 +104,6 @@ func TestBlocks(t *testing.T) { } } -var diffTestData = []struct { - a string - b string - s int - d []protocol.BlockInfo -}{ - {"contents", "contents", 1024, []protocol.BlockInfo{}}, - {"", "", 1024, []protocol.BlockInfo{}}, - {"contents", "contents", 3, []protocol.BlockInfo{}}, - {"contents", "cantents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}}}, - {"contents", "contants", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}}}, - {"contents", "cantants", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}}}, - {"contents", "", 3, []protocol.BlockInfo{{Offset: 0, Size: 0}}}, - {"", "contents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}, {Offset: 6, Size: 2}}}, - {"con", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}}, - {"contents", "con", 3, nil}, - {"contents", "cont", 3, []protocol.BlockInfo{{Offset: 3, Size: 1}}}, - {"cont", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}}, -} - -func TestDiff(t *testing.T) { - for i, test := range diffTestData { - a, _ := Blocks(context.TODO(), bytes.NewBufferString(test.a), test.s, -1, nil, false) - b, _ := Blocks(context.TODO(), bytes.NewBufferString(test.b), test.s, -1, nil, false) - _, d := BlockDiff(a, b) - if len(d) != len(test.d) { - t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d)) - } else { - for j := range test.d { - if d[j].Offset != test.d[j].Offset { - t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset) - } - if d[j].Size != test.d[j].Size { - t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size) - } - } - } - } -} - -func TestDiffEmpty(t *testing.T) { - emptyCases := []struct { - a []protocol.BlockInfo - b []protocol.BlockInfo - need int - have int - }{ - {nil, nil, 0, 0}, - {[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0}, - {nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0}, - } - for _, emptyCase := range emptyCases { - h, n := BlockDiff(emptyCase.a, emptyCase.b) - if len(h) != emptyCase.have { - t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have) - } - if len(n) != emptyCase.need { - t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have) - } - } -} - func TestAdler32Variants(t *testing.T) { // Verify that the two adler32 functions give matching results for a few // different blocks of data. diff --git a/lib/scanner/walk_test.go b/lib/scanner/walk_test.go index 1ca9d1ce..6cb48817 100644 --- a/lib/scanner/walk_test.go +++ b/lib/scanner/walk_test.go @@ -25,6 +25,7 @@ import ( "github.com/syncthing/syncthing/lib/ignore" "github.com/syncthing/syncthing/lib/osutil" "github.com/syncthing/syncthing/lib/protocol" + "github.com/syncthing/syncthing/lib/sha256" "golang.org/x/text/unicode/norm" ) @@ -162,21 +163,21 @@ func TestVerify(t *testing.T) { } buf = bytes.NewBuffer(data) - err = Verify(buf, blocksize, blocks) + err = verify(buf, blocksize, blocks) t.Log(err) if err != nil { t.Fatal("Unexpected verify failure", err) } buf = bytes.NewBuffer(append(data, '\n')) - err = Verify(buf, blocksize, blocks) + err = verify(buf, blocksize, blocks) t.Log(err) if err == nil { t.Fatal("Unexpected verify success") } buf = bytes.NewBuffer(data[:len(data)-1]) - err = Verify(buf, blocksize, blocks) + err = verify(buf, blocksize, blocks) t.Log(err) if err == nil { t.Fatal("Unexpected verify success") @@ -184,7 +185,7 @@ func TestVerify(t *testing.T) { data[42] = 42 buf = bytes.NewBuffer(data) - err = Verify(buf, blocksize, blocks) + err = verify(buf, blocksize, blocks) t.Log(err) if err == nil { t.Fatal("Unexpected verify success") @@ -529,3 +530,35 @@ func TestStopWalk(t *testing.T) { t.Error("unexpected extra entries received after cancel") } } + +// Verify returns nil or an error describing the mismatch between the block +// list and actual reader contents +func verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error { + hf := sha256.New() + // A 32k buffer is used for copying into the hash function. + buf := make([]byte, 32<<10) + + for i, block := range blocks { + lr := &io.LimitedReader{R: r, N: int64(blocksize)} + _, err := io.CopyBuffer(hf, lr, buf) + if err != nil { + return err + } + + hash := hf.Sum(nil) + hf.Reset() + + if !bytes.Equal(hash, block.Hash) { + return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i) + } + } + + // We should have reached the end now + bs := make([]byte, 1) + n, err := r.Read(bs) + if n != 0 || err != io.EOF { + return fmt.Errorf("file continues past end of blocks") + } + + return nil +}