lib/scanner: Refactoring
GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4642 LGTM: imsodin, AudriusButkevicius
This commit is contained in:
committed by
Audrius Butkevicius
parent
bbc178ccc4
commit
89a021609b
@@ -7,9 +7,7 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
|
||||
@@ -26,18 +24,20 @@ type Counter interface {
|
||||
|
||||
// Blocks returns the blockwise hash of the reader.
|
||||
func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
|
||||
if counter == nil {
|
||||
counter = &noopCounter{}
|
||||
}
|
||||
|
||||
hf := sha256.New()
|
||||
hashLength := hf.Size()
|
||||
|
||||
var mhf io.Writer
|
||||
var whf hash.Hash32
|
||||
|
||||
var weakHf hash.Hash32 = noopHash{}
|
||||
var multiHf io.Writer = hf
|
||||
if useWeakHashes {
|
||||
whf = adler32.New()
|
||||
mhf = io.MultiWriter(hf, whf)
|
||||
} else {
|
||||
whf = noopHash{}
|
||||
mhf = hf
|
||||
// Use an actual weak hash function, make the multiHf
|
||||
// write to both hash functions.
|
||||
weakHf = adler32.New()
|
||||
multiHf = io.MultiWriter(hf, weakHf)
|
||||
}
|
||||
|
||||
var blocks []protocol.BlockInfo
|
||||
@@ -65,7 +65,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
|
||||
}
|
||||
|
||||
lr.N = int64(blocksize)
|
||||
n, err := io.CopyBuffer(mhf, lr, buf)
|
||||
n, err := io.CopyBuffer(multiHf, lr, buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -74,9 +74,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
|
||||
break
|
||||
}
|
||||
|
||||
if counter != nil {
|
||||
counter.Update(n)
|
||||
}
|
||||
counter.Update(n)
|
||||
|
||||
// Carve out a hash-sized chunk of "hashes" to store the hash for this
|
||||
// block.
|
||||
@@ -87,14 +85,14 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
|
||||
Size: int32(n),
|
||||
Offset: offset,
|
||||
Hash: thisHash,
|
||||
WeakHash: whf.Sum32(),
|
||||
WeakHash: weakHf.Sum32(),
|
||||
}
|
||||
|
||||
blocks = append(blocks, b)
|
||||
offset += n
|
||||
|
||||
hf.Reset()
|
||||
whf.Reset()
|
||||
weakHf.Reset()
|
||||
}
|
||||
|
||||
if len(blocks) == 0 {
|
||||
@@ -109,104 +107,6 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
|
||||
return blocks, nil
|
||||
}
|
||||
|
||||
// PopulateOffsets sets the Offset field on each block
|
||||
func PopulateOffsets(blocks []protocol.BlockInfo) {
|
||||
var offset int64
|
||||
for i := range blocks {
|
||||
blocks[i].Offset = offset
|
||||
offset += int64(blocks[i].Size)
|
||||
}
|
||||
}
|
||||
|
||||
// BlockDiff returns lists of common and missing (to transform src into tgt)
|
||||
// blocks. Both block lists must have been created with the same block size.
|
||||
func BlockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) {
|
||||
if len(tgt) == 0 && len(src) != 0 {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if len(tgt) != 0 && len(src) == 0 {
|
||||
// Copy the entire file
|
||||
return nil, tgt
|
||||
}
|
||||
|
||||
for i := range tgt {
|
||||
if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) {
|
||||
// Copy differing block
|
||||
need = append(need, tgt[i])
|
||||
} else {
|
||||
have = append(have, tgt[i])
|
||||
}
|
||||
}
|
||||
|
||||
return have, need
|
||||
}
|
||||
|
||||
// Verify returns nil or an error describing the mismatch between the block
|
||||
// list and actual reader contents
|
||||
func Verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
|
||||
hf := sha256.New()
|
||||
// A 32k buffer is used for copying into the hash function.
|
||||
buf := make([]byte, 32<<10)
|
||||
|
||||
for i, block := range blocks {
|
||||
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
|
||||
_, err := io.CopyBuffer(hf, lr, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
hash := hf.Sum(nil)
|
||||
hf.Reset()
|
||||
|
||||
if !bytes.Equal(hash, block.Hash) {
|
||||
return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
|
||||
}
|
||||
}
|
||||
|
||||
// We should have reached the end now
|
||||
bs := make([]byte, 1)
|
||||
n, err := r.Read(bs)
|
||||
if n != 0 || err != io.EOF {
|
||||
return fmt.Errorf("file continues past end of blocks")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func VerifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) {
|
||||
if len(buf) != int(block.Size) {
|
||||
return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size)
|
||||
}
|
||||
hf := sha256.New()
|
||||
_, err := hf.Write(buf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
hash := hf.Sum(nil)
|
||||
|
||||
if !bytes.Equal(hash, block.Hash) {
|
||||
return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash)
|
||||
}
|
||||
|
||||
return hash, nil
|
||||
}
|
||||
|
||||
// BlocksEqual returns whether two slices of blocks are exactly the same hash
|
||||
// and index pair wise.
|
||||
func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
|
||||
if len(tgt) != len(src) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, sblk := range src {
|
||||
if !bytes.Equal(sblk.Hash, tgt[i].Hash) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type noopHash struct{}
|
||||
|
||||
func (noopHash) Sum32() uint32 { return 0 }
|
||||
@@ -215,3 +115,7 @@ func (noopHash) Size() int { return 0 }
|
||||
func (noopHash) Reset() {}
|
||||
func (noopHash) Sum([]byte) []byte { return nil }
|
||||
func (noopHash) Write([]byte) (int, error) { return 0, nil }
|
||||
|
||||
type noopCounter struct{}
|
||||
|
||||
func (c *noopCounter) Update(bytes int64) {}
|
||||
|
||||
@@ -104,68 +104,6 @@ func TestBlocks(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
var diffTestData = []struct {
|
||||
a string
|
||||
b string
|
||||
s int
|
||||
d []protocol.BlockInfo
|
||||
}{
|
||||
{"contents", "contents", 1024, []protocol.BlockInfo{}},
|
||||
{"", "", 1024, []protocol.BlockInfo{}},
|
||||
{"contents", "contents", 3, []protocol.BlockInfo{}},
|
||||
{"contents", "cantents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}}},
|
||||
{"contents", "contants", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}}},
|
||||
{"contents", "cantants", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}}},
|
||||
{"contents", "", 3, []protocol.BlockInfo{{Offset: 0, Size: 0}}},
|
||||
{"", "contents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
|
||||
{"con", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
|
||||
{"contents", "con", 3, nil},
|
||||
{"contents", "cont", 3, []protocol.BlockInfo{{Offset: 3, Size: 1}}},
|
||||
{"cont", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
|
||||
}
|
||||
|
||||
func TestDiff(t *testing.T) {
|
||||
for i, test := range diffTestData {
|
||||
a, _ := Blocks(context.TODO(), bytes.NewBufferString(test.a), test.s, -1, nil, false)
|
||||
b, _ := Blocks(context.TODO(), bytes.NewBufferString(test.b), test.s, -1, nil, false)
|
||||
_, d := BlockDiff(a, b)
|
||||
if len(d) != len(test.d) {
|
||||
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
|
||||
} else {
|
||||
for j := range test.d {
|
||||
if d[j].Offset != test.d[j].Offset {
|
||||
t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset)
|
||||
}
|
||||
if d[j].Size != test.d[j].Size {
|
||||
t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDiffEmpty(t *testing.T) {
|
||||
emptyCases := []struct {
|
||||
a []protocol.BlockInfo
|
||||
b []protocol.BlockInfo
|
||||
need int
|
||||
have int
|
||||
}{
|
||||
{nil, nil, 0, 0},
|
||||
{[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0},
|
||||
{nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0},
|
||||
}
|
||||
for _, emptyCase := range emptyCases {
|
||||
h, n := BlockDiff(emptyCase.a, emptyCase.b)
|
||||
if len(h) != emptyCase.have {
|
||||
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
|
||||
}
|
||||
if len(n) != emptyCase.need {
|
||||
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestAdler32Variants(t *testing.T) {
|
||||
// Verify that the two adler32 functions give matching results for a few
|
||||
// different blocks of data.
|
||||
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
"github.com/syncthing/syncthing/lib/ignore"
|
||||
"github.com/syncthing/syncthing/lib/osutil"
|
||||
"github.com/syncthing/syncthing/lib/protocol"
|
||||
"github.com/syncthing/syncthing/lib/sha256"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
@@ -162,21 +163,21 @@ func TestVerify(t *testing.T) {
|
||||
}
|
||||
|
||||
buf = bytes.NewBuffer(data)
|
||||
err = Verify(buf, blocksize, blocks)
|
||||
err = verify(buf, blocksize, blocks)
|
||||
t.Log(err)
|
||||
if err != nil {
|
||||
t.Fatal("Unexpected verify failure", err)
|
||||
}
|
||||
|
||||
buf = bytes.NewBuffer(append(data, '\n'))
|
||||
err = Verify(buf, blocksize, blocks)
|
||||
err = verify(buf, blocksize, blocks)
|
||||
t.Log(err)
|
||||
if err == nil {
|
||||
t.Fatal("Unexpected verify success")
|
||||
}
|
||||
|
||||
buf = bytes.NewBuffer(data[:len(data)-1])
|
||||
err = Verify(buf, blocksize, blocks)
|
||||
err = verify(buf, blocksize, blocks)
|
||||
t.Log(err)
|
||||
if err == nil {
|
||||
t.Fatal("Unexpected verify success")
|
||||
@@ -184,7 +185,7 @@ func TestVerify(t *testing.T) {
|
||||
|
||||
data[42] = 42
|
||||
buf = bytes.NewBuffer(data)
|
||||
err = Verify(buf, blocksize, blocks)
|
||||
err = verify(buf, blocksize, blocks)
|
||||
t.Log(err)
|
||||
if err == nil {
|
||||
t.Fatal("Unexpected verify success")
|
||||
@@ -529,3 +530,35 @@ func TestStopWalk(t *testing.T) {
|
||||
t.Error("unexpected extra entries received after cancel")
|
||||
}
|
||||
}
|
||||
|
||||
// Verify returns nil or an error describing the mismatch between the block
|
||||
// list and actual reader contents
|
||||
func verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
|
||||
hf := sha256.New()
|
||||
// A 32k buffer is used for copying into the hash function.
|
||||
buf := make([]byte, 32<<10)
|
||||
|
||||
for i, block := range blocks {
|
||||
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
|
||||
_, err := io.CopyBuffer(hf, lr, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
hash := hf.Sum(nil)
|
||||
hf.Reset()
|
||||
|
||||
if !bytes.Equal(hash, block.Hash) {
|
||||
return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
|
||||
}
|
||||
}
|
||||
|
||||
// We should have reached the end now
|
||||
bs := make([]byte, 1)
|
||||
n, err := r.Read(bs)
|
||||
if n != 0 || err != io.EOF {
|
||||
return fmt.Errorf("file continues past end of blocks")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user