diff --git a/cmd/stfileinfo/main.go b/cmd/stfileinfo/main.go index fd14fcde..1a98614e 100644 --- a/cmd/stfileinfo/main.go +++ b/cmd/stfileinfo/main.go @@ -70,7 +70,7 @@ func main() { if *standardBlocks || blockSize < protocol.BlockSize { blockSize = protocol.BlockSize } - bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil) + bs, err := scanner.Blocks(fd, blockSize, fi.Size(), nil, true) if err != nil { log.Fatal(err) } diff --git a/lib/model/model.go b/lib/model/model.go index b0674370..71e9b91c 100644 --- a/lib/model/model.go +++ b/lib/model/model.go @@ -1813,6 +1813,7 @@ func (m *Model) internalScanFolderSubdirs(folder string, subDirs []string) error ShortID: m.shortID, ProgressTickIntervalS: folderCfg.ScanProgressIntervalS, Cancel: cancel, + UseWeakHashes: folderCfg.WeakHashThresholdPct < 100, }) if err != nil { diff --git a/lib/model/model_test.go b/lib/model/model_test.go index a95884e9..4177cf29 100644 --- a/lib/model/model_test.go +++ b/lib/model/model_test.go @@ -317,7 +317,7 @@ func (f *fakeConnection) addFile(name string, flags uint32, ftype protocol.FileI f.mut.Lock() defer f.mut.Unlock() - blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil) + blocks, _ := scanner.Blocks(bytes.NewReader(data), protocol.BlockSize, int64(len(data)), nil, true) var version protocol.Vector version = version.Update(f.id.Short()) diff --git a/lib/model/rwfolder.go b/lib/model/rwfolder.go index ca4b85d0..482dbc79 100644 --- a/lib/model/rwfolder.go +++ b/lib/model/rwfolder.go @@ -1082,7 +1082,7 @@ func (f *sendReceiveFolder) handleFile(file protocol.FileInfo, copyChan chan<- c // Check for an old temporary file which might have some blocks we could // reuse. - tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil) + tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil, false) if err == nil { // Check for any reusable blocks in the temp file tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) diff --git a/lib/model/rwfolder_test.go b/lib/model/rwfolder_test.go index 4ea713b6..93c17a96 100644 --- a/lib/model/rwfolder_test.go +++ b/lib/model/rwfolder_test.go @@ -238,7 +238,7 @@ func TestCopierFinder(t *testing.T) { } // Verify that the fetched blocks have actually been written to the temp file - blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil) + blks, err := scanner.HashFile(tempFile, protocol.BlockSize, nil, false) if err != nil { t.Log(err) } @@ -291,7 +291,7 @@ func TestWeakHash(t *testing.T) { // File 1: abcdefgh // File 2: xyabcdef f.Seek(0, os.SEEK_SET) - existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil) + existing, err := scanner.Blocks(f, protocol.BlockSize, size, nil, true) if err != nil { t.Error(err) } @@ -300,7 +300,7 @@ func TestWeakHash(t *testing.T) { remainder := io.LimitReader(f, size-shift) prefix := io.LimitReader(rand.Reader, shift) nf := io.MultiReader(prefix, remainder) - desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil) + desired, err := scanner.Blocks(nf, protocol.BlockSize, size, nil, true) if err != nil { t.Error(err) } diff --git a/lib/scanner/blockqueue.go b/lib/scanner/blockqueue.go index 59672422..1ce83312 100644 --- a/lib/scanner/blockqueue.go +++ b/lib/scanner/blockqueue.go @@ -20,13 +20,13 @@ import ( // workers are used in parallel. The outbox will become closed when the inbox // is closed and all items handled. -func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}) { +func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan protocol.FileInfo, counter Counter, done, cancel chan struct{}, useWeakHashes bool) { wg := sync.NewWaitGroup() wg.Add(workers) for i := 0; i < workers; i++ { go func() { - hashFiles(dir, blockSize, outbox, inbox, counter, cancel) + hashFiles(dir, blockSize, outbox, inbox, counter, cancel, useWeakHashes) wg.Done() }() } @@ -40,7 +40,8 @@ func newParallelHasher(dir string, blockSize, workers int, outbox, inbox chan pr }() } -func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo, error) { +// HashFile hashes the files and returns a list of blocks representing the file. +func HashFile(path string, blockSize int, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) { fd, err := os.Open(path) if err != nil { l.Debugln("open:", err) @@ -60,7 +61,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo // Hash the file. This may take a while for large files. - blocks, err := Blocks(fd, blockSize, size, counter) + blocks, err := Blocks(fd, blockSize, size, counter, useWeakHashes) if err != nil { l.Debugln("blocks:", err) return nil, err @@ -81,7 +82,7 @@ func HashFile(path string, blockSize int, counter Counter) ([]protocol.BlockInfo return blocks, nil } -func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}) { +func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, counter Counter, cancel chan struct{}, useWeakHashes bool) { for { select { case f, ok := <-inbox: @@ -93,7 +94,7 @@ func hashFiles(dir string, blockSize int, outbox, inbox chan protocol.FileInfo, panic("Bug. Asked to hash a directory or a deleted file.") } - blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter) + blocks, err := HashFile(filepath.Join(dir, f.Name), blockSize, counter, useWeakHashes) if err != nil { l.Debugln("hash error:", f.Name, err) continue diff --git a/lib/scanner/blocks.go b/lib/scanner/blocks.go index 788e7428..428c88a1 100644 --- a/lib/scanner/blocks.go +++ b/lib/scanner/blocks.go @@ -9,6 +9,7 @@ package scanner import ( "bytes" "fmt" + "hash" "io" "github.com/chmduquesne/rollinghash/adler32" @@ -23,11 +24,20 @@ type Counter interface { } // Blocks returns the blockwise hash of the reader. -func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter) ([]protocol.BlockInfo, error) { +func Blocks(r io.Reader, blocksize int, sizehint int64, counter Counter, useWeakHashes bool) ([]protocol.BlockInfo, error) { hf := sha256.New() hashLength := hf.Size() - whf := adler32.New() - mhf := io.MultiWriter(hf, whf) + + var mhf io.Writer + var whf hash.Hash32 + + if useWeakHashes { + whf = adler32.New() + mhf = io.MultiWriter(hf, whf) + } else { + whf = noopHash{} + mhf = hf + } var blocks []protocol.BlockInfo var hashes, thisHash []byte @@ -189,3 +199,12 @@ func BlocksEqual(src, tgt []protocol.BlockInfo) bool { } return true } + +type noopHash struct{} + +func (noopHash) Sum32() uint32 { return 0 } +func (noopHash) BlockSize() int { return 0 } +func (noopHash) Size() int { return 0 } +func (noopHash) Reset() {} +func (noopHash) Sum([]byte) []byte { return nil } +func (noopHash) Write([]byte) (int, error) { return 0, nil } diff --git a/lib/scanner/blocks_test.go b/lib/scanner/blocks_test.go index 47d816a0..1943cb7a 100644 --- a/lib/scanner/blocks_test.go +++ b/lib/scanner/blocks_test.go @@ -68,7 +68,7 @@ var blocksTestData = []struct { func TestBlocks(t *testing.T) { for testNo, test := range blocksTestData { buf := bytes.NewBuffer(test.data) - blocks, err := Blocks(buf, test.blocksize, -1, nil) + blocks, err := Blocks(buf, test.blocksize, -1, nil, true) if err != nil { t.Fatal(err) @@ -125,8 +125,8 @@ var diffTestData = []struct { func TestDiff(t *testing.T) { for i, test := range diffTestData { - a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil) - b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil) + a, _ := Blocks(bytes.NewBufferString(test.a), test.s, -1, nil, false) + b, _ := Blocks(bytes.NewBufferString(test.b), test.s, -1, nil, false) _, d := BlockDiff(a, b) if len(d) != len(test.d) { t.Fatalf("Incorrect length for diff %d; %d != %d", i, len(d), len(test.d)) diff --git a/lib/scanner/walk.go b/lib/scanner/walk.go index 5c1e2649..d3ceb9b9 100644 --- a/lib/scanner/walk.go +++ b/lib/scanner/walk.go @@ -72,6 +72,8 @@ type Config struct { ProgressTickIntervalS int // Signals cancel from the outside - when closed, we should stop walking. Cancel chan struct{} + // Wether or not we should also compute weak hashes + UseWeakHashes bool } type CurrentFiler interface { @@ -129,7 +131,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) { // We're not required to emit scan progress events, just kick off hashers, // and feed inputs directly from the walker. if w.ProgressTickIntervalS < 0 { - newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel) + newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.Cancel, w.UseWeakHashes) return finishedChan, nil } @@ -160,7 +162,7 @@ func (w *walker) walk() (chan protocol.FileInfo, error) { done := make(chan struct{}) progress := newByteCounter() - newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel) + newParallelHasher(w.Dir, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.Cancel, w.UseWeakHashes) // A routine which actually emits the FolderScanProgress events // every w.ProgressTicker ticks, until the hasher routines terminate. diff --git a/lib/scanner/walk_test.go b/lib/scanner/walk_test.go index 14eb5a02..48f27981 100644 --- a/lib/scanner/walk_test.go +++ b/lib/scanner/walk_test.go @@ -148,7 +148,7 @@ func TestVerify(t *testing.T) { progress := newByteCounter() defer progress.Close() - blocks, err := Blocks(buf, blocksize, -1, progress) + blocks, err := Blocks(buf, blocksize, -1, progress, false) if err != nil { t.Fatal(err) } @@ -423,7 +423,7 @@ func BenchmarkHashFile(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { - if _, err := HashFile(testdataName, protocol.BlockSize, nil); err != nil { + if _, err := HashFile(testdataName, protocol.BlockSize, nil, true); err != nil { b.Fatal(err) } }