all: Implement variable sized blocks (fixes #4807)
This commit is contained in:
committed by
Audrius Butkevicius
parent
01aef75c96
commit
19c7cd99f5
@@ -63,7 +63,6 @@ func HashFile(ctx context.Context, fs fs.Filesystem, path string, blockSize int,
|
||||
// is closed and all items handled.
|
||||
type parallelHasher struct {
|
||||
fs fs.Filesystem
|
||||
blockSize int
|
||||
workers int
|
||||
outbox chan<- protocol.FileInfo
|
||||
inbox <-chan protocol.FileInfo
|
||||
@@ -73,10 +72,9 @@ type parallelHasher struct {
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func newParallelHasher(ctx context.Context, fs fs.Filesystem, blockSize, workers int, outbox chan<- protocol.FileInfo, inbox <-chan protocol.FileInfo, counter Counter, done chan<- struct{}, useWeakHashes bool) {
|
||||
func newParallelHasher(ctx context.Context, fs fs.Filesystem, workers int, outbox chan<- protocol.FileInfo, inbox <-chan protocol.FileInfo, counter Counter, done chan<- struct{}, useWeakHashes bool) {
|
||||
ph := ¶llelHasher{
|
||||
fs: fs,
|
||||
blockSize: blockSize,
|
||||
workers: workers,
|
||||
outbox: outbox,
|
||||
inbox: inbox,
|
||||
@@ -108,7 +106,7 @@ func (ph *parallelHasher) hashFiles(ctx context.Context) {
|
||||
panic("Bug. Asked to hash a directory or a deleted file.")
|
||||
}
|
||||
|
||||
blocks, err := HashFile(ctx, ph.fs, f.Name, ph.blockSize, ph.counter, ph.useWeakHashes)
|
||||
blocks, err := HashFile(ctx, ph.fs, f.Name, f.BlockSize(), ph.counter, ph.useWeakHashes)
|
||||
if err != nil {
|
||||
l.Debugln("hash error:", f.Name, err)
|
||||
continue
|
||||
|
||||
@@ -125,7 +125,7 @@ func TestAdler32Variants(t *testing.T) {
|
||||
}
|
||||
|
||||
// protocol block sized data
|
||||
data := make([]byte, protocol.BlockSize)
|
||||
data := make([]byte, protocol.MinBlockSize)
|
||||
for i := 0; i < 5; i++ {
|
||||
rand.Read(data)
|
||||
if !checkFn(data) {
|
||||
|
||||
@@ -55,6 +55,48 @@ func (i infiniteFS) Open(name string) (fs.File, error) {
|
||||
return &fakeFile{name, i.filesize, 0}, nil
|
||||
}
|
||||
|
||||
type singleFileFS struct {
|
||||
fs.Filesystem
|
||||
name string
|
||||
filesize int64
|
||||
}
|
||||
|
||||
func (s singleFileFS) Lstat(name string) (fs.FileInfo, error) {
|
||||
switch name {
|
||||
case ".":
|
||||
return fakeInfo{".", 0}, nil
|
||||
case s.name:
|
||||
return fakeInfo{s.name, s.filesize}, nil
|
||||
default:
|
||||
return nil, errors.New("no such file")
|
||||
}
|
||||
}
|
||||
|
||||
func (s singleFileFS) Stat(name string) (fs.FileInfo, error) {
|
||||
switch name {
|
||||
case ".":
|
||||
return fakeInfo{".", 0}, nil
|
||||
case s.name:
|
||||
return fakeInfo{s.name, s.filesize}, nil
|
||||
default:
|
||||
return nil, errors.New("no such file")
|
||||
}
|
||||
}
|
||||
|
||||
func (s singleFileFS) DirNames(name string) ([]string, error) {
|
||||
if name != "." {
|
||||
return nil, errors.New("no such file")
|
||||
}
|
||||
return []string{s.name}, nil
|
||||
}
|
||||
|
||||
func (s singleFileFS) Open(name string) (fs.File, error) {
|
||||
if name != s.name {
|
||||
return nil, errors.New("no such file")
|
||||
}
|
||||
return &fakeFile{s.name, s.filesize, 0}, nil
|
||||
}
|
||||
|
||||
type fakeInfo struct {
|
||||
name string
|
||||
size int64
|
||||
@@ -42,8 +42,6 @@ type Config struct {
|
||||
Folder string
|
||||
// Limit walking to these paths within Dir, or no limit if Sub is empty
|
||||
Subs []string
|
||||
// BlockSize controls the size of the block used when hashing.
|
||||
BlockSize int
|
||||
// If Matcher is not nil, it is used to identify files to ignore which were specified by the user.
|
||||
Matcher *ignore.Matcher
|
||||
// Number of hours to keep temporary files for
|
||||
@@ -68,6 +66,8 @@ type Config struct {
|
||||
ProgressTickIntervalS int
|
||||
// Whether or not we should also compute weak hashes
|
||||
UseWeakHashes bool
|
||||
// Whether to use large blocks for large files or the old standard of 128KiB for everything.
|
||||
UseLargeBlocks bool
|
||||
}
|
||||
|
||||
type CurrentFiler interface {
|
||||
@@ -98,7 +98,7 @@ type walker struct {
|
||||
// Walk returns the list of files found in the local folder by scanning the
|
||||
// file system. Files are blockwise hashed.
|
||||
func (w *walker) walk(ctx context.Context) chan protocol.FileInfo {
|
||||
l.Debugln("Walk", w.Subs, w.BlockSize, w.Matcher)
|
||||
l.Debugln("Walk", w.Subs, w.Matcher)
|
||||
|
||||
toHashChan := make(chan protocol.FileInfo)
|
||||
finishedChan := make(chan protocol.FileInfo)
|
||||
@@ -120,7 +120,7 @@ func (w *walker) walk(ctx context.Context) chan protocol.FileInfo {
|
||||
// We're not required to emit scan progress events, just kick off hashers,
|
||||
// and feed inputs directly from the walker.
|
||||
if w.ProgressTickIntervalS < 0 {
|
||||
newParallelHasher(ctx, w.Filesystem, w.BlockSize, w.Hashers, finishedChan, toHashChan, nil, nil, w.UseWeakHashes)
|
||||
newParallelHasher(ctx, w.Filesystem, w.Hashers, finishedChan, toHashChan, nil, nil, w.UseWeakHashes)
|
||||
return finishedChan
|
||||
}
|
||||
|
||||
@@ -151,7 +151,7 @@ func (w *walker) walk(ctx context.Context) chan protocol.FileInfo {
|
||||
done := make(chan struct{})
|
||||
progress := newByteCounter()
|
||||
|
||||
newParallelHasher(ctx, w.Filesystem, w.BlockSize, w.Hashers, finishedChan, realToHashChan, progress, done, w.UseWeakHashes)
|
||||
newParallelHasher(ctx, w.Filesystem, w.Hashers, finishedChan, realToHashChan, progress, done, w.UseWeakHashes)
|
||||
|
||||
// A routine which actually emits the FolderScanProgress events
|
||||
// every w.ProgressTicker ticks, until the hasher routines terminate.
|
||||
@@ -161,7 +161,7 @@ func (w *walker) walk(ctx context.Context) chan protocol.FileInfo {
|
||||
for {
|
||||
select {
|
||||
case <-done:
|
||||
l.Debugln("Walk progress done", w.Folder, w.Subs, w.BlockSize, w.Matcher)
|
||||
l.Debugln("Walk progress done", w.Folder, w.Subs, w.Matcher)
|
||||
ticker.Stop()
|
||||
return
|
||||
case <-ticker.C:
|
||||
@@ -285,32 +285,52 @@ func (w *walker) walkRegular(ctx context.Context, relPath string, info fs.FileIn
|
||||
curMode |= 0111
|
||||
}
|
||||
|
||||
cf, ok := w.CurrentFiler.CurrentFile(relPath)
|
||||
blockSize := protocol.MinBlockSize
|
||||
curFile, hasCurFile := w.CurrentFiler.CurrentFile(relPath)
|
||||
|
||||
if w.UseLargeBlocks {
|
||||
blockSize = protocol.BlockSize(info.Size())
|
||||
|
||||
if hasCurFile {
|
||||
// Check if we should retain current block size.
|
||||
curBlockSize := curFile.BlockSize()
|
||||
if blockSize > curBlockSize && blockSize/curBlockSize <= 2 {
|
||||
// New block size is larger, but not more than twice larger.
|
||||
// Retain.
|
||||
blockSize = curBlockSize
|
||||
} else if curBlockSize > blockSize && curBlockSize/blockSize <= 2 {
|
||||
// Old block size is larger, but not more than twice larger.
|
||||
// Retain.
|
||||
blockSize = curBlockSize
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
f := protocol.FileInfo{
|
||||
Name: relPath,
|
||||
Type: protocol.FileInfoTypeFile,
|
||||
Version: cf.Version.Update(w.ShortID),
|
||||
Version: curFile.Version.Update(w.ShortID),
|
||||
Permissions: curMode & uint32(maskModePerm),
|
||||
NoPermissions: w.IgnorePerms,
|
||||
ModifiedS: info.ModTime().Unix(),
|
||||
ModifiedNs: int32(info.ModTime().Nanosecond()),
|
||||
ModifiedBy: w.ShortID,
|
||||
Size: info.Size(),
|
||||
RawBlockSize: int32(blockSize),
|
||||
}
|
||||
|
||||
if ok {
|
||||
if cf.IsEquivalent(f, w.IgnorePerms, true) {
|
||||
if hasCurFile {
|
||||
if curFile.IsEquivalent(f, w.IgnorePerms, true) {
|
||||
return nil
|
||||
}
|
||||
if cf.Invalid {
|
||||
if curFile.Invalid {
|
||||
// We do not want to override the global version with the file we
|
||||
// currently have. Keeping only our local counter makes sure we are in
|
||||
// conflict with any other existing versions, which will be resolved by
|
||||
// the normal pulling mechanisms.
|
||||
f.Version = f.Version.DropOthers(w.ShortID)
|
||||
}
|
||||
l.Debugln("rescan:", cf, info.ModTime().Unix(), info.Mode()&fs.ModePerm)
|
||||
l.Debugln("rescan:", curFile, info.ModTime().Unix(), info.Mode()&fs.ModePerm)
|
||||
}
|
||||
|
||||
l.Debugln("to hash:", relPath, f)
|
||||
|
||||
@@ -65,7 +65,6 @@ func TestWalkSub(t *testing.T) {
|
||||
fchan := Walk(context.TODO(), Config{
|
||||
Filesystem: fs.NewFilesystem(fs.FilesystemTypeBasic, "testdata"),
|
||||
Subs: []string{"dir2"},
|
||||
BlockSize: 128 * 1024,
|
||||
Matcher: ignores,
|
||||
Hashers: 2,
|
||||
})
|
||||
@@ -98,7 +97,6 @@ func TestWalk(t *testing.T) {
|
||||
|
||||
fchan := Walk(context.TODO(), Config{
|
||||
Filesystem: fs.NewFilesystem(fs.FilesystemTypeBasic, "testdata"),
|
||||
BlockSize: 128 * 1024,
|
||||
Matcher: ignores,
|
||||
Hashers: 2,
|
||||
})
|
||||
@@ -221,11 +219,11 @@ func TestNormalization(t *testing.T) {
|
||||
// make sure it all gets done. In production, things will be correct
|
||||
// eventually...
|
||||
|
||||
_, err := walkDir(fs, "testdata/normalization")
|
||||
_, err := walkDir(fs, "testdata/normalization", nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
tmp, err := walkDir(fs, "testdata/normalization")
|
||||
tmp, err := walkDir(fs, "testdata/normalization", nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -272,7 +270,7 @@ func TestWalkSymlinkUnix(t *testing.T) {
|
||||
|
||||
for _, path := range []string{".", "link"} {
|
||||
// Scan it
|
||||
files, _ := walkDir(fs.NewFilesystem(fs.FilesystemTypeBasic, "_symlinks"), path)
|
||||
files, _ := walkDir(fs.NewFilesystem(fs.FilesystemTypeBasic, "_symlinks"), path, nil)
|
||||
|
||||
// Verify that we got one symlink and with the correct attributes
|
||||
if len(files) != 1 {
|
||||
@@ -303,7 +301,7 @@ func TestWalkSymlinkWindows(t *testing.T) {
|
||||
|
||||
for _, path := range []string{".", "link"} {
|
||||
// Scan it
|
||||
files, _ := walkDir(fs.NewFilesystem(fs.FilesystemTypeBasic, "_symlinks"), path)
|
||||
files, _ := walkDir(fs.NewFilesystem(fs.FilesystemTypeBasic, "_symlinks"), path, nil)
|
||||
|
||||
// Verify that we got zero symlinks
|
||||
if len(files) != 0 {
|
||||
@@ -332,7 +330,7 @@ func TestWalkRootSymlink(t *testing.T) {
|
||||
}
|
||||
|
||||
// Scan it
|
||||
files, err := walkDir(fs.NewFilesystem(fs.FilesystemTypeBasic, link), ".")
|
||||
files, err := walkDir(fs.NewFilesystem(fs.FilesystemTypeBasic, link), ".", nil)
|
||||
if err != nil {
|
||||
t.Fatal("Expected no error when root folder path is provided via a symlink: " + err.Error())
|
||||
}
|
||||
@@ -342,13 +340,83 @@ func TestWalkRootSymlink(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func walkDir(fs fs.Filesystem, dir string) ([]protocol.FileInfo, error) {
|
||||
func TestBlocksizeHysteresis(t *testing.T) {
|
||||
// Verify that we select the right block size in the presence of old
|
||||
// file information.
|
||||
|
||||
sf := fs.NewWalkFilesystem(&singleFileFS{
|
||||
name: "testfile.dat",
|
||||
filesize: 500 << 20, // 500 MiB
|
||||
})
|
||||
|
||||
current := make(fakeCurrentFiler)
|
||||
|
||||
runTest := func(expectedBlockSize int) {
|
||||
files, err := walkDir(sf, ".", current)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(files) != 1 {
|
||||
t.Fatalf("expected one file, not %d", len(files))
|
||||
}
|
||||
if s := files[0].BlockSize(); s != expectedBlockSize {
|
||||
t.Fatalf("incorrect block size %d != expected %d", s, expectedBlockSize)
|
||||
}
|
||||
}
|
||||
|
||||
// Scan with no previous knowledge. We should get a 512 KiB block size.
|
||||
|
||||
runTest(512 << 10)
|
||||
|
||||
// Scan on the assumption that previous size was 256 KiB. Retain 256 KiB
|
||||
// block size.
|
||||
|
||||
current["testfile.dat"] = protocol.FileInfo{
|
||||
Name: "testfile.dat",
|
||||
Size: 500 << 20,
|
||||
RawBlockSize: 256 << 10,
|
||||
}
|
||||
runTest(256 << 10)
|
||||
|
||||
// Scan on the assumption that previous size was 1 MiB. Retain 1 MiB
|
||||
// block size.
|
||||
|
||||
current["testfile.dat"] = protocol.FileInfo{
|
||||
Name: "testfile.dat",
|
||||
Size: 500 << 20,
|
||||
RawBlockSize: 1 << 20,
|
||||
}
|
||||
runTest(1 << 20)
|
||||
|
||||
// Scan on the assumption that previous size was 128 KiB. Move to 512
|
||||
// KiB because the difference is large.
|
||||
|
||||
current["testfile.dat"] = protocol.FileInfo{
|
||||
Name: "testfile.dat",
|
||||
Size: 500 << 20,
|
||||
RawBlockSize: 128 << 10,
|
||||
}
|
||||
runTest(512 << 10)
|
||||
|
||||
// Scan on the assumption that previous size was 2 MiB. Move to 512
|
||||
// KiB because the difference is large.
|
||||
|
||||
current["testfile.dat"] = protocol.FileInfo{
|
||||
Name: "testfile.dat",
|
||||
Size: 500 << 20,
|
||||
RawBlockSize: 2 << 20,
|
||||
}
|
||||
runTest(512 << 10)
|
||||
}
|
||||
|
||||
func walkDir(fs fs.Filesystem, dir string, cfiler CurrentFiler) ([]protocol.FileInfo, error) {
|
||||
fchan := Walk(context.TODO(), Config{
|
||||
Filesystem: fs,
|
||||
Subs: []string{dir},
|
||||
BlockSize: 128 * 1024,
|
||||
AutoNormalize: true,
|
||||
Hashers: 2,
|
||||
Filesystem: fs,
|
||||
Subs: []string{dir},
|
||||
AutoNormalize: true,
|
||||
Hashers: 2,
|
||||
UseLargeBlocks: true,
|
||||
CurrentFiler: cfiler,
|
||||
})
|
||||
|
||||
var tmp []protocol.FileInfo
|
||||
@@ -410,7 +478,7 @@ func BenchmarkHashFile(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
if _, err := HashFile(context.TODO(), fs.NewFilesystem(fs.FilesystemTypeBasic, ""), testdataName, protocol.BlockSize, nil, true); err != nil {
|
||||
if _, err := HashFile(context.TODO(), fs.NewFilesystem(fs.FilesystemTypeBasic, ""), testdataName, protocol.MinBlockSize, nil, true); err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
}
|
||||
@@ -451,7 +519,6 @@ func TestStopWalk(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
fchan := Walk(ctx, Config{
|
||||
Filesystem: fs,
|
||||
BlockSize: 128 * 1024,
|
||||
Hashers: numHashers,
|
||||
ProgressTickIntervalS: -1, // Don't attempt to build the full list of files before starting to scan...
|
||||
})
|
||||
@@ -513,7 +580,7 @@ func TestIssue4799(t *testing.T) {
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
files, err := walkDir(fs, "/foo")
|
||||
files, err := walkDir(fs, "/foo", nil)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -540,7 +607,6 @@ func TestIssue4841(t *testing.T) {
|
||||
fchan := Walk(context.TODO(), Config{
|
||||
Filesystem: fs,
|
||||
Subs: nil,
|
||||
BlockSize: 128 * 1024,
|
||||
AutoNormalize: true,
|
||||
Hashers: 2,
|
||||
CurrentFiler: fakeCurrentFiler{
|
||||
|
||||
Reference in New Issue
Block a user