lib/scanner: Refactoring

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4642
LGTM: imsodin, AudriusButkevicius
This commit is contained in:
Lars K.W. Gohlke
2018-01-14 14:30:11 +00:00
committed by Audrius Butkevicius
parent bbc178ccc4
commit 89a021609b
5 changed files with 192 additions and 187 deletions

View File

@@ -7,9 +7,7 @@
package scanner
import (
"bytes"
"context"
"fmt"
"hash"
"io"
@@ -26,18 +24,20 @@ type Counter interface {
// Blocks returns the blockwise hash of the reader.
func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) {
if counter == nil {
counter = &noopCounter{}
}
hf := sha256.New()
hashLength := hf.Size()
var mhf io.Writer
var whf hash.Hash32
var weakHf hash.Hash32 = noopHash{}
var multiHf io.Writer = hf
if useWeakHashes {
whf = adler32.New()
mhf = io.MultiWriter(hf, whf)
} else {
whf = noopHash{}
mhf = hf
// Use an actual weak hash function, make the multiHf
// write to both hash functions.
weakHf = adler32.New()
multiHf = io.MultiWriter(hf, weakHf)
}
var blocks []protocol.BlockInfo
@@ -65,7 +65,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
}
lr.N = int64(blocksize)
n, err := io.CopyBuffer(mhf, lr, buf)
n, err := io.CopyBuffer(multiHf, lr, buf)
if err != nil {
return nil, err
}
@@ -74,9 +74,7 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
break
}
if counter != nil {
counter.Update(n)
}
counter.Update(n)
// Carve out a hash-sized chunk of "hashes" to store the hash for this
// block.
@@ -87,14 +85,14 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
Size: int32(n),
Offset: offset,
Hash: thisHash,
WeakHash: whf.Sum32(),
WeakHash: weakHf.Sum32(),
}
blocks = append(blocks, b)
offset += n
hf.Reset()
whf.Reset()
weakHf.Reset()
}
if len(blocks) == 0 {
@@ -109,104 +107,6 @@ func Blocks(ctx context.Context, r io.Reader, blocksize int, sizehint int64, cou
return blocks, nil
}
// PopulateOffsets sets the Offset field on each block
func PopulateOffsets(blocks []protocol.BlockInfo) {
var offset int64
for i := range blocks {
blocks[i].Offset = offset
offset += int64(blocks[i].Size)
}
}
// BlockDiff returns lists of common and missing (to transform src into tgt)
// blocks. Both block lists must have been created with the same block size.
func BlockDiff(src, tgt []protocol.BlockInfo) (have, need []protocol.BlockInfo) {
if len(tgt) == 0 && len(src) != 0 {
return nil, nil
}
if len(tgt) != 0 && len(src) == 0 {
// Copy the entire file
return nil, tgt
}
for i := range tgt {
if i >= len(src) || !bytes.Equal(tgt[i].Hash, src[i].Hash) {
// Copy differing block
need = append(need, tgt[i])
} else {
have = append(have, tgt[i])
}
}
return have, need
}
// Verify returns nil or an error describing the mismatch between the block
// list and actual reader contents
func Verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
hf := sha256.New()
// A 32k buffer is used for copying into the hash function.
buf := make([]byte, 32<<10)
for i, block := range blocks {
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
_, err := io.CopyBuffer(hf, lr, buf)
if err != nil {
return err
}
hash := hf.Sum(nil)
hf.Reset()
if !bytes.Equal(hash, block.Hash) {
return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
}
}
// We should have reached the end now
bs := make([]byte, 1)
n, err := r.Read(bs)
if n != 0 || err != io.EOF {
return fmt.Errorf("file continues past end of blocks")
}
return nil
}
func VerifyBuffer(buf []byte, block protocol.BlockInfo) ([]byte, error) {
if len(buf) != int(block.Size) {
return nil, fmt.Errorf("length mismatch %d != %d", len(buf), block.Size)
}
hf := sha256.New()
_, err := hf.Write(buf)
if err != nil {
return nil, err
}
hash := hf.Sum(nil)
if !bytes.Equal(hash, block.Hash) {
return hash, fmt.Errorf("hash mismatch %x != %x", hash, block.Hash)
}
return hash, nil
}
// BlocksEqual returns whether two slices of blocks are exactly the same hash
// and index pair wise.
func BlocksEqual(src, tgt []protocol.BlockInfo) bool {
if len(tgt) != len(src) {
return false
}
for i, sblk := range src {
if !bytes.Equal(sblk.Hash, tgt[i].Hash) {
return false
}
}
return true
}
type noopHash struct{}
func (noopHash) Sum32() uint32 { return 0 }
@@ -215,3 +115,7 @@ func (noopHash) Size() int { return 0 }
func (noopHash) Reset() {}
func (noopHash) Sum([]byte) []byte { return nil }
func (noopHash) Write([]byte) (int, error) { return 0, nil }
type noopCounter struct{}
func (c *noopCounter) Update(bytes int64) {}

View File

@@ -104,68 +104,6 @@ func TestBlocks(t *testing.T) {
}
}
var diffTestData = []struct {
a string
b string
s int
d []protocol.BlockInfo
}{
{"contents", "contents", 1024, []protocol.BlockInfo{}},
{"", "", 1024, []protocol.BlockInfo{}},
{"contents", "contents", 3, []protocol.BlockInfo{}},
{"contents", "cantents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}}},
{"contents", "contants", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}}},
{"contents", "cantants", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}}},
{"contents", "", 3, []protocol.BlockInfo{{Offset: 0, Size: 0}}},
{"", "contents", 3, []protocol.BlockInfo{{Offset: 0, Size: 3}, {Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
{"con", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
{"contents", "con", 3, nil},
{"contents", "cont", 3, []protocol.BlockInfo{{Offset: 3, Size: 1}}},
{"cont", "contents", 3, []protocol.BlockInfo{{Offset: 3, Size: 3}, {Offset: 6, Size: 2}}},
}
func TestDiff(t *testing.T) {
for i, test := range diffTestData {
a, _ := Blocks(context.TODO(), bytes.NewBufferString(test.a), test.s, -1, nil, false)
b, _ := Blocks(context.TODO(), bytes.NewBufferString(test.b), test.s, -1, nil, false)
_, d := BlockDiff(a, b)
if len(d) != len(test.d) {
t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d))
} else {
for j := range test.d {
if d[j].Offset != test.d[j].Offset {
t.Errorf("Incorrect offset for diff %d block %d; %d != %d", i, j, d[j].Offset, test.d[j].Offset)
}
if d[j].Size != test.d[j].Size {
t.Errorf("Incorrect length for diff %d block %d; %d != %d", i, j, d[j].Size, test.d[j].Size)
}
}
}
}
}
func TestDiffEmpty(t *testing.T) {
emptyCases := []struct {
a []protocol.BlockInfo
b []protocol.BlockInfo
need int
have int
}{
{nil, nil, 0, 0},
{[]protocol.BlockInfo{{Offset: 3, Size: 1}}, nil, 0, 0},
{nil, []protocol.BlockInfo{{Offset: 3, Size: 1}}, 1, 0},
}
for _, emptyCase := range emptyCases {
h, n := BlockDiff(emptyCase.a, emptyCase.b)
if len(h) != emptyCase.have {
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
}
if len(n) != emptyCase.need {
t.Errorf("incorrect have: %d != %d", len(h), emptyCase.have)
}
}
}
func TestAdler32Variants(t *testing.T) {
// Verify that the two adler32 functions give matching results for a few
// different blocks of data.

View File

@@ -25,6 +25,7 @@ import (
"github.com/syncthing/syncthing/lib/ignore"
"github.com/syncthing/syncthing/lib/osutil"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/sha256"
"golang.org/x/text/unicode/norm"
)
@@ -162,21 +163,21 @@ func TestVerify(t *testing.T) {
}
buf = bytes.NewBuffer(data)
err = Verify(buf, blocksize, blocks)
err = verify(buf, blocksize, blocks)
t.Log(err)
if err != nil {
t.Fatal("Unexpected verify failure", err)
}
buf = bytes.NewBuffer(append(data, '\n'))
err = Verify(buf, blocksize, blocks)
err = verify(buf, blocksize, blocks)
t.Log(err)
if err == nil {
t.Fatal("Unexpected verify success")
}
buf = bytes.NewBuffer(data[:len(data)-1])
err = Verify(buf, blocksize, blocks)
err = verify(buf, blocksize, blocks)
t.Log(err)
if err == nil {
t.Fatal("Unexpected verify success")
@@ -184,7 +185,7 @@ func TestVerify(t *testing.T) {
data[42] = 42
buf = bytes.NewBuffer(data)
err = Verify(buf, blocksize, blocks)
err = verify(buf, blocksize, blocks)
t.Log(err)
if err == nil {
t.Fatal("Unexpected verify success")
@@ -529,3 +530,35 @@ func TestStopWalk(t *testing.T) {
t.Error("unexpected extra entries received after cancel")
}
}
// Verify returns nil or an error describing the mismatch between the block
// list and actual reader contents
func verify(r io.Reader, blocksize int, blocks []protocol.BlockInfo) error {
hf := sha256.New()
// A 32k buffer is used for copying into the hash function.
buf := make([]byte, 32<<10)
for i, block := range blocks {
lr := &io.LimitedReader{R: r, N: int64(blocksize)}
_, err := io.CopyBuffer(hf, lr, buf)
if err != nil {
return err
}
hash := hf.Sum(nil)
hf.Reset()
if !bytes.Equal(hash, block.Hash) {
return fmt.Errorf("hash mismatch %x != %x for block %d", hash, block.Hash, i)
}
}
// We should have reached the end now
bs := make([]byte, 1)
n, err := r.Read(bs)
if n != 0 || err != io.EOF {
return fmt.Errorf("file continues past end of blocks")
}
return nil
}