lib/scanner: Refactor scanner.Walk API

The old usage pattern was to create a Walker with a bunch of attributes,
then call Walk() on it and nothing else. This extracts the attributes
into a Config struct and exposes a Walk(cfg Config) method instead, as
there was no reason to expose the state-holding walker type.

Also creates a few no-op implementations of the necessary interfaces
so that we can skip nil checks and simiplify things here and there.

Definitely look at this diff without whitespace.

GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3060
This commit is contained in:
Jakob Borg 2016-05-09 18:25:39 +00:00 committed by Audrius Butkevicius
parent d77d8ff803
commit 21e116aa45
3 changed files with 117 additions and 91 deletions

View File

@ -1401,7 +1401,9 @@ func (m *Model) internalScanFolderSubdirs(folder string, subs []string) error {
cancel := make(chan struct{}) cancel := make(chan struct{})
defer close(cancel) defer close(cancel)
w := &scanner.Walker{ runner.setState(FolderScanning)
fchan, err := scanner.Walk(scanner.Config{
Folder: folderCfg.ID, Folder: folderCfg.ID,
Dir: folderCfg.Path(), Dir: folderCfg.Path(),
Subs: subs, Subs: subs,
@ -1417,11 +1419,8 @@ func (m *Model) internalScanFolderSubdirs(folder string, subs []string) error {
ShortID: m.shortID, ShortID: m.shortID,
ProgressTickIntervalS: folderCfg.ScanProgressIntervalS, ProgressTickIntervalS: folderCfg.ScanProgressIntervalS,
Cancel: cancel, Cancel: cancel,
} })
runner.setState(FolderScanning)
fchan, err := w.Walk()
if err != nil { if err != nil {
// The error we get here is likely an OS level error, which might not be // The error we get here is likely an OS level error, which might not be
// as readable as our health check errors. Check if we can get a health // as readable as our health check errors. Check if we can get a health

View File

@ -17,7 +17,6 @@ import (
"unicode/utf8" "unicode/utf8"
"github.com/rcrowley/go-metrics" "github.com/rcrowley/go-metrics"
"github.com/syncthing/syncthing/lib/db"
"github.com/syncthing/syncthing/lib/events" "github.com/syncthing/syncthing/lib/events"
"github.com/syncthing/syncthing/lib/ignore" "github.com/syncthing/syncthing/lib/ignore"
"github.com/syncthing/syncthing/lib/osutil" "github.com/syncthing/syncthing/lib/osutil"
@ -41,7 +40,7 @@ func init() {
} }
} }
type Walker struct { type Config struct {
// Folder for which the walker has been created // Folder for which the walker has been created
Folder string Folder string
// Dir is the base directory for the walk // Dir is the base directory for the walk
@ -58,8 +57,9 @@ type Walker struct {
TempLifetime time.Duration TempLifetime time.Duration
// If CurrentFiler is not nil, it is queried for the current file before rescanning. // If CurrentFiler is not nil, it is queried for the current file before rescanning.
CurrentFiler CurrentFiler CurrentFiler CurrentFiler
// If MtimeRepo is not nil, it is used to provide mtimes on systems that don't support setting arbirtary mtimes. // If MtimeRepo is not nil, it is used to provide mtimes on systems that
MtimeRepo *db.VirtualMtimeRepo // don't support setting arbitrary mtimes.
MtimeRepo MtimeRepo
// If IgnorePerms is true, changes to permission bits will not be // If IgnorePerms is true, changes to permission bits will not be
// detected. Scanned files will get zero permission bits and the // detected. Scanned files will get zero permission bits and the
// NoPermissionBits flag set. // NoPermissionBits flag set.
@ -79,8 +79,6 @@ type Walker struct {
} }
type TempNamer interface { type TempNamer interface {
// Temporary returns a temporary name for the filed referred to by filepath.
TempName(path string) string
// IsTemporary returns true if path refers to the name of temporary file. // IsTemporary returns true if path refers to the name of temporary file.
IsTemporary(path string) bool IsTemporary(path string) bool
} }
@ -90,9 +88,35 @@ type CurrentFiler interface {
CurrentFile(name string) (protocol.FileInfo, bool) CurrentFile(name string) (protocol.FileInfo, bool)
} }
type MtimeRepo interface {
// GetMtime returns a (possibly modified) actual mtime given a file name
// and its on disk mtime.
GetMtime(relPath string, mtime time.Time) time.Time
}
func Walk(cfg Config) (chan protocol.FileInfo, error) {
w := walker{cfg}
if w.CurrentFiler == nil {
w.CurrentFiler = noCurrentFiler{}
}
if w.TempNamer == nil {
w.TempNamer = noTempNamer{}
}
if w.MtimeRepo == nil {
w.MtimeRepo = noMtimeRepo{}
}
return w.walk()
}
type walker struct {
Config
}
// Walk returns the list of files found in the local folder by scanning the // Walk returns the list of files found in the local folder by scanning the
// file system. Files are blockwise hashed. // file system. Files are blockwise hashed.
func (w *Walker) Walk() (chan protocol.FileInfo, error) { func (w *walker) walk() (chan protocol.FileInfo, error) {
l.Debugln("Walk", w.Dir, w.Subs, w.BlockSize, w.Matcher) l.Debugln("Walk", w.Dir, w.Subs, w.BlockSize, w.Matcher)
err := checkDir(w.Dir) err := checkDir(w.Dir)
@ -195,7 +219,7 @@ func (w *Walker) Walk() (chan protocol.FileInfo, error) {
return finishedChan, nil return finishedChan, nil
} }
func (w *Walker) walkAndHashFiles(fchan, dchan chan protocol.FileInfo) filepath.WalkFunc { func (w *walker) walkAndHashFiles(fchan, dchan chan protocol.FileInfo) filepath.WalkFunc {
now := time.Now() now := time.Now()
return func(absPath string, info os.FileInfo, err error) error { return func(absPath string, info os.FileInfo, err error) error {
// Return value used when we are returning early and don't want to // Return value used when we are returning early and don't want to
@ -221,12 +245,9 @@ func (w *Walker) walkAndHashFiles(fchan, dchan chan protocol.FileInfo) filepath.
return nil return nil
} }
mtime := info.ModTime() mtime := w.MtimeRepo.GetMtime(relPath, info.ModTime())
if w.MtimeRepo != nil {
mtime = w.MtimeRepo.GetMtime(relPath, mtime)
}
if w.TempNamer != nil && w.TempNamer.IsTemporary(relPath) { if w.TempNamer.IsTemporary(relPath) {
// A temporary file // A temporary file
l.Debugln("temporary:", relPath) l.Debugln("temporary:", relPath)
if info.Mode().IsRegular() && mtime.Add(w.TempLifetime).Before(now) { if info.Mode().IsRegular() && mtime.Add(w.TempLifetime).Before(now) {
@ -272,34 +293,30 @@ func (w *Walker) walkAndHashFiles(fchan, dchan chan protocol.FileInfo) filepath.
} }
} }
func (w *Walker) walkRegular(relPath string, info os.FileInfo, mtime time.Time, fchan chan protocol.FileInfo) error { func (w *walker) walkRegular(relPath string, info os.FileInfo, mtime time.Time, fchan chan protocol.FileInfo) error {
curMode := uint32(info.Mode()) curMode := uint32(info.Mode())
if runtime.GOOS == "windows" && osutil.IsWindowsExecutable(relPath) { if runtime.GOOS == "windows" && osutil.IsWindowsExecutable(relPath) {
curMode |= 0111 curMode |= 0111
} }
var currentVersion protocol.Vector // A file is "unchanged", if it
if w.CurrentFiler != nil { // - exists
// A file is "unchanged", if it // - has the same permissions as previously, unless we are ignoring permissions
// - exists // - was not marked deleted (since it apparently exists now)
// - has the same permissions as previously, unless we are ignoring permissions // - had the same modification time as it has now
// - was not marked deleted (since it apparently exists now) // - was not a directory previously (since it's a file now)
// - had the same modification time as it has now // - was not a symlink (since it's a file now)
// - was not a directory previously (since it's a file now) // - was not invalid (since it looks valid now)
// - was not a symlink (since it's a file now) // - has the same size as previously
// - was not invalid (since it looks valid now) cf, ok := w.CurrentFiler.CurrentFile(relPath)
// - has the same size as previously permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, curMode)
cf, ok := w.CurrentFiler.CurrentFile(relPath) if ok && permUnchanged && !cf.IsDeleted() && cf.Modified == mtime.Unix() && !cf.IsDirectory() &&
permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, curMode) !cf.IsSymlink() && !cf.IsInvalid() && cf.Size() == info.Size() {
if ok && permUnchanged && !cf.IsDeleted() && cf.Modified == mtime.Unix() && !cf.IsDirectory() && return nil
!cf.IsSymlink() && !cf.IsInvalid() && cf.Size() == info.Size() {
return nil
}
currentVersion = cf.Version
l.Debugln("rescan:", cf, mtime.Unix(), info.Mode()&os.ModePerm)
} }
l.Debugln("rescan:", cf, mtime.Unix(), info.Mode()&os.ModePerm)
var flags = curMode & uint32(maskModePerm) var flags = curMode & uint32(maskModePerm)
if w.IgnorePerms { if w.IgnorePerms {
flags = protocol.FlagNoPermBits | 0666 flags = protocol.FlagNoPermBits | 0666
@ -307,7 +324,7 @@ func (w *Walker) walkRegular(relPath string, info os.FileInfo, mtime time.Time,
f := protocol.FileInfo{ f := protocol.FileInfo{
Name: relPath, Name: relPath,
Version: currentVersion.Update(w.ShortID), Version: cf.Version.Update(w.ShortID),
Flags: flags, Flags: flags,
Modified: mtime.Unix(), Modified: mtime.Unix(),
CachedSize: info.Size(), CachedSize: info.Size(),
@ -323,23 +340,18 @@ func (w *Walker) walkRegular(relPath string, info os.FileInfo, mtime time.Time,
return nil return nil
} }
func (w *Walker) walkDir(relPath string, info os.FileInfo, mtime time.Time, dchan chan protocol.FileInfo) error { func (w *walker) walkDir(relPath string, info os.FileInfo, mtime time.Time, dchan chan protocol.FileInfo) error {
var currentVersion protocol.Vector // A directory is "unchanged", if it
// - exists
if w.CurrentFiler != nil { // - has the same permissions as previously, unless we are ignoring permissions
// A directory is "unchanged", if it // - was not marked deleted (since it apparently exists now)
// - exists // - was a directory previously (not a file or something else)
// - has the same permissions as previously, unless we are ignoring permissions // - was not a symlink (since it's a directory now)
// - was not marked deleted (since it apparently exists now) // - was not invalid (since it looks valid now)
// - was a directory previously (not a file or something else) cf, ok := w.CurrentFiler.CurrentFile(relPath)
// - was not a symlink (since it's a directory now) permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, uint32(info.Mode()))
// - was not invalid (since it looks valid now) if ok && permUnchanged && !cf.IsDeleted() && cf.IsDirectory() && !cf.IsSymlink() && !cf.IsInvalid() {
cf, ok := w.CurrentFiler.CurrentFile(relPath) return nil
permUnchanged := w.IgnorePerms || !cf.HasPermissionBits() || PermsEqual(cf.Flags, uint32(info.Mode()))
if ok && permUnchanged && !cf.IsDeleted() && cf.IsDirectory() && !cf.IsSymlink() && !cf.IsInvalid() {
return nil
}
currentVersion = cf.Version
} }
flags := uint32(protocol.FlagDirectory) flags := uint32(protocol.FlagDirectory)
@ -350,7 +362,7 @@ func (w *Walker) walkDir(relPath string, info os.FileInfo, mtime time.Time, dcha
} }
f := protocol.FileInfo{ f := protocol.FileInfo{
Name: relPath, Name: relPath,
Version: currentVersion.Update(w.ShortID), Version: cf.Version.Update(w.ShortID),
Flags: flags, Flags: flags,
Modified: mtime.Unix(), Modified: mtime.Unix(),
} }
@ -370,7 +382,7 @@ func (w *Walker) walkDir(relPath string, info os.FileInfo, mtime time.Time, dcha
// transcend into symlinks at all, but there are rumours that this may have // transcend into symlinks at all, but there are rumours that this may have
// happened anyway under some circumstances, possibly Windows reparse points // happened anyway under some circumstances, possibly Windows reparse points
// or something. Hence the "skip" return from this one. // or something. Hence the "skip" return from this one.
func (w *Walker) walkSymlink(absPath, relPath string, dchan chan protocol.FileInfo) (skip bool, err error) { func (w *walker) walkSymlink(absPath, relPath string, dchan chan protocol.FileInfo) (skip bool, err error) {
// If the target is a directory, do NOT descend down there. This will // If the target is a directory, do NOT descend down there. This will
// cause files to get tracked, and removing the symlink will as a result // cause files to get tracked, and removing the symlink will as a result
// remove files in their real location. // remove files in their real location.
@ -395,25 +407,21 @@ func (w *Walker) walkSymlink(absPath, relPath string, dchan chan protocol.FileIn
return true, nil return true, nil
} }
var currentVersion protocol.Vector // A symlink is "unchanged", if
if w.CurrentFiler != nil { // - it exists
// A symlink is "unchanged", if // - it wasn't deleted (because it isn't now)
// - it exists // - it was a symlink
// - it wasn't deleted (because it isn't now) // - it wasn't invalid
// - it was a symlink // - the symlink type (file/dir) was the same
// - it wasn't invalid // - the block list (i.e. hash of target) was the same
// - the symlink type (file/dir) was the same cf, ok := w.CurrentFiler.CurrentFile(relPath)
// - the block list (i.e. hash of target) was the same if ok && !cf.IsDeleted() && cf.IsSymlink() && !cf.IsInvalid() && SymlinkTypeEqual(targetType, cf) && BlocksEqual(cf.Blocks, blocks) {
cf, ok := w.CurrentFiler.CurrentFile(relPath) return true, nil
if ok && !cf.IsDeleted() && cf.IsSymlink() && !cf.IsInvalid() && SymlinkTypeEqual(targetType, cf) && BlocksEqual(cf.Blocks, blocks) {
return true, nil
}
currentVersion = cf.Version
} }
f := protocol.FileInfo{ f := protocol.FileInfo{
Name: relPath, Name: relPath,
Version: currentVersion.Update(w.ShortID), Version: cf.Version.Update(w.ShortID),
Flags: uint32(protocol.FlagSymlink | protocol.FlagNoPermBits | 0666 | SymlinkFlags(targetType)), Flags: uint32(protocol.FlagSymlink | protocol.FlagNoPermBits | 0666 | SymlinkFlags(targetType)),
Modified: 0, Modified: 0,
Blocks: blocks, Blocks: blocks,
@ -432,7 +440,7 @@ func (w *Walker) walkSymlink(absPath, relPath string, dchan chan protocol.FileIn
// normalizePath returns the normalized relative path (possibly after fixing // normalizePath returns the normalized relative path (possibly after fixing
// it on disk), or skip is true. // it on disk), or skip is true.
func (w *Walker) normalizePath(absPath, relPath string) (normPath string, skip bool) { func (w *walker) normalizePath(absPath, relPath string) (normPath string, skip bool) {
if runtime.GOOS == "darwin" { if runtime.GOOS == "darwin" {
// Mac OS X file names should always be NFD normalized. // Mac OS X file names should always be NFD normalized.
normPath = norm.NFD.String(relPath) normPath = norm.NFD.String(relPath)
@ -574,3 +582,27 @@ func (c *byteCounter) Total() int64 {
func (c *byteCounter) Close() { func (c *byteCounter) Close() {
close(c.stop) close(c.stop)
} }
// A no-op CurrentFiler
type noCurrentFiler struct{}
func (noCurrentFiler) CurrentFile(name string) (protocol.FileInfo, bool) {
return protocol.FileInfo{}, false
}
// A no-op TempNamer
type noTempNamer struct{}
func (noTempNamer) IsTemporary(path string) bool {
return false
}
// A no-op MtimeRepo
type noMtimeRepo struct{}
func (noMtimeRepo) GetMtime(relPath string, mtime time.Time) time.Time {
return mtime
}

View File

@ -59,14 +59,13 @@ func TestWalkSub(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
w := Walker{ fchan, err := Walk(Config{
Dir: "testdata", Dir: "testdata",
Subs: []string{"dir2"}, Subs: []string{"dir2"},
BlockSize: 128 * 1024, BlockSize: 128 * 1024,
Matcher: ignores, Matcher: ignores,
Hashers: 2, Hashers: 2,
} })
fchan, err := w.Walk()
var files []protocol.FileInfo var files []protocol.FileInfo
for f := range fchan { for f := range fchan {
files = append(files, f) files = append(files, f)
@ -97,14 +96,13 @@ func TestWalk(t *testing.T) {
} }
t.Log(ignores) t.Log(ignores)
w := Walker{ fchan, err := Walk(Config{
Dir: "testdata", Dir: "testdata",
BlockSize: 128 * 1024, BlockSize: 128 * 1024,
Matcher: ignores, Matcher: ignores,
Hashers: 2, Hashers: 2,
} })
fchan, err := w.Walk()
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -122,22 +120,20 @@ func TestWalk(t *testing.T) {
} }
func TestWalkError(t *testing.T) { func TestWalkError(t *testing.T) {
w := Walker{ _, err := Walk(Config{
Dir: "testdata-missing", Dir: "testdata-missing",
BlockSize: 128 * 1024, BlockSize: 128 * 1024,
Hashers: 2, Hashers: 2,
} })
_, err := w.Walk()
if err == nil { if err == nil {
t.Error("no error from missing directory") t.Error("no error from missing directory")
} }
w = Walker{ _, err = Walk(Config{
Dir: "testdata/bar", Dir: "testdata/bar",
BlockSize: 128 * 1024, BlockSize: 128 * 1024,
} })
_, err = w.Walk()
if err == nil { if err == nil {
t.Error("no error from non-directory") t.Error("no error from non-directory")
@ -278,7 +274,7 @@ func TestNormalization(t *testing.T) {
} }
func TestIssue1507(t *testing.T) { func TestIssue1507(t *testing.T) {
w := Walker{} w := &walker{}
c := make(chan protocol.FileInfo, 100) c := make(chan protocol.FileInfo, 100)
fn := w.walkAndHashFiles(c, c) fn := w.walkAndHashFiles(c, c)
@ -286,14 +282,13 @@ func TestIssue1507(t *testing.T) {
} }
func walkDir(dir string) ([]protocol.FileInfo, error) { func walkDir(dir string) ([]protocol.FileInfo, error) {
w := Walker{ fchan, err := Walk(Config{
Dir: dir, Dir: dir,
BlockSize: 128 * 1024, BlockSize: 128 * 1024,
AutoNormalize: true, AutoNormalize: true,
Hashers: 2, Hashers: 2,
} })
fchan, err := w.Walk()
if err != nil { if err != nil {
return nil, err return nil, err
} }