More graceful handling on folder errors (fixes #762)

Checks health before accepting every scanner batch, also
recovers from errors without having to restart.
This commit is contained in:
Audrius Butkevicius
2015-03-28 14:25:42 +00:00
committed by Jakob Borg
parent 34ba5678c3
commit 7406176fad
8 changed files with 370 additions and 103 deletions

View File

@@ -1104,7 +1104,11 @@ func (m *Model) ScanFolders() map[string]error {
errorsMut.Lock()
errors[folder] = err
errorsMut.Unlock()
m.cfg.InvalidateFolder(folder, err.Error())
// Potentially sets the error twice, once in the scanner just
// by doing a check, and once here, if the error returned is
// the same one as returned by CheckFolderHealth, though
// duplicate set is handled by SetFolderError
m.cfg.SetFolderError(folder, err)
}
wg.Done()
}()
@@ -1180,9 +1184,11 @@ nextSub:
}
runner.setState(FolderScanning)
defer runner.setState(FolderIdle)
fchan, err := w.Walk()
if err != nil {
m.cfg.SetFolderError(folder, err)
return err
}
batchSize := 100
@@ -1196,12 +1202,20 @@ nextSub:
"size": f.Size(),
})
if len(batch) == batchSize {
if err := m.CheckFolderHealth(folder); err != nil {
l.Infoln("Stopping folder %s mid-scan due to folder error: %s", folder, err)
return err
}
fs.Update(protocol.LocalDeviceID, batch)
batch = batch[:0]
}
batch = append(batch, f)
}
if len(batch) > 0 {
if err := m.CheckFolderHealth(folder); err != nil {
l.Infoln("Stopping folder %s mid-scan due to folder error: %s", folder, err)
return err
} else if len(batch) > 0 {
fs.Update(protocol.LocalDeviceID, batch)
}
@@ -1286,7 +1300,6 @@ nextSub:
fs.Update(protocol.LocalDeviceID, batch)
}
runner.setState(FolderIdle)
return nil
}
@@ -1510,6 +1523,73 @@ func (m *Model) BringToFront(folder, file string) {
}
}
// Returns current folder error, or nil if the folder is healthy.
// Updates the Invalid field on the folder configuration struct, and emits a
// ConfigSaved event which causes a GUI refresh.
func (m *Model) CheckFolderHealth(id string) error {
folder, ok := m.cfg.Folders()[id]
if !ok {
return errors.New("Folder does not exist")
}
fi, err := os.Stat(folder.Path)
if m.CurrentLocalVersion(id) > 0 {
// Safety check. If the cached index contains files but the
// folder doesn't exist, we have a problem. We would assume
// that all files have been deleted which might not be the case,
// so mark it as invalid instead.
if err != nil || !fi.IsDir() {
err = errors.New("Folder path missing")
} else if !folder.HasMarker() {
err = errors.New("Folder marker missing")
}
} else if os.IsNotExist(err) {
// If we don't have any files in the index, and the directory
// doesn't exist, try creating it.
err = os.MkdirAll(folder.Path, 0700)
if err == nil {
err = folder.CreateMarker()
}
} else if !folder.HasMarker() {
// If we don't have any files in the index, and the path does exist
// but the marker is not there, create it.
err = folder.CreateMarker()
}
if err == nil {
if folder.Invalid != "" {
l.Infof("Starting folder %q after error %q", folder.ID, folder.Invalid)
m.cfg.SetFolderError(id, nil)
}
if folder, ok := m.cfg.Folders()[id]; !ok || folder.Invalid != "" {
panic("Unable to unset folder \"" + id + "\" error.")
}
return nil
}
if folder.Invalid == err.Error() {
return err
}
// folder is a copy of the original struct, hence Invalid value is
// preserved after the set.
m.cfg.SetFolderError(id, err)
if folder.Invalid == "" {
l.Warnf("Stopping folder %q - %v", folder.ID, err)
} else {
l.Infof("Folder %q error changed: %q -> %q", folder.ID, folder.Invalid, err)
}
if folder, ok := m.cfg.Folders()[id]; !ok || folder.Invalid != err.Error() {
panic("Unable to set folder \"" + id + "\" error.")
}
return err
}
func (m *Model) String() string {
return fmt.Sprintf("model@%p", m)
}

View File

@@ -20,6 +20,7 @@ import (
"github.com/syncthing/protocol"
"github.com/syncthing/syncthing/internal/config"
"github.com/syncthing/syncthing/internal/db"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/storage"
)
@@ -578,6 +579,166 @@ func TestRefuseUnknownBits(t *testing.T) {
}
}
func TestROScanRecovery(t *testing.T) {
ldb, _ := leveldb.Open(storage.NewMemStorage(), nil)
set := db.NewFileSet("default", ldb)
set.Update(protocol.LocalDeviceID, []protocol.FileInfo{
{Name: "dummyfile"},
})
fcfg := config.FolderConfiguration{
ID: "default",
Path: "testdata/rotestfolder",
RescanIntervalS: 1,
}
cfg := config.Wrap("/tmp/test", config.Configuration{
Folders: []config.FolderConfiguration{fcfg},
Devices: []config.DeviceConfiguration{
{
DeviceID: device1,
},
},
})
os.RemoveAll(fcfg.Path)
m := NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
m.AddFolder(fcfg)
m.StartFolderRO("default")
waitFor := func(status string) error {
timeout := time.Now().Add(2 * time.Second)
for {
if time.Now().After(timeout) {
return fmt.Errorf("Timed out waiting for status: %s, current status: %s", status, m.cfg.Folders()["default"].Invalid)
}
if m.cfg.Folders()["default"].Invalid == status {
return nil
}
time.Sleep(10 * time.Millisecond)
}
}
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
os.Mkdir(fcfg.Path, 0700)
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
fd, err := os.Create(filepath.Join(fcfg.Path, ".stfolder"))
if err != nil {
t.Error(err)
return
}
fd.Close()
if err := waitFor(""); err != nil {
t.Error(err)
return
}
os.Remove(filepath.Join(fcfg.Path, ".stfolder"))
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
os.Remove(fcfg.Path)
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
}
func TestRWScanRecovery(t *testing.T) {
ldb, _ := leveldb.Open(storage.NewMemStorage(), nil)
set := db.NewFileSet("default", ldb)
set.Update(protocol.LocalDeviceID, []protocol.FileInfo{
{Name: "dummyfile"},
})
fcfg := config.FolderConfiguration{
ID: "default",
Path: "testdata/rwtestfolder",
RescanIntervalS: 1,
}
cfg := config.Wrap("/tmp/test", config.Configuration{
Folders: []config.FolderConfiguration{fcfg},
Devices: []config.DeviceConfiguration{
{
DeviceID: device1,
},
},
})
os.RemoveAll(fcfg.Path)
m := NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
m.AddFolder(fcfg)
m.StartFolderRW("default")
waitFor := func(status string) error {
timeout := time.Now().Add(2 * time.Second)
for {
if time.Now().After(timeout) {
return fmt.Errorf("Timed out waiting for status: %s, current status: %s", status, m.cfg.Folders()["default"].Invalid)
}
if m.cfg.Folders()["default"].Invalid == status {
return nil
}
time.Sleep(10 * time.Millisecond)
}
}
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
os.Mkdir(fcfg.Path, 0700)
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
fd, err := os.Create(filepath.Join(fcfg.Path, ".stfolder"))
if err != nil {
t.Error(err)
return
}
fd.Close()
if err := waitFor(""); err != nil {
t.Error(err)
return
}
os.Remove(filepath.Join(fcfg.Path, ".stfolder"))
if err := waitFor("Folder marker missing"); err != nil {
t.Error(err)
return
}
os.Remove(fcfg.Path)
if err := waitFor("Folder path missing"); err != nil {
t.Error(err)
return
}
}
func TestGlobalDirectoryTree(t *testing.T) {
db, _ := leveldb.Open(storage.NewMemStorage(), nil)
m := NewModel(defaultConfig, protocol.LocalDeviceID, "device", "syncthing", "dev", db)

View File

@@ -40,6 +40,12 @@ func (s *roFolder) Serve() {
timer := time.NewTimer(time.Millisecond)
defer timer.Stop()
reschedule := func() {
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (s.intv.Nanoseconds()*3 + rand.Int63n(2*s.intv.Nanoseconds())) / 4
timer.Reset(time.Duration(sleepNanos) * time.Nanosecond)
}
initialScanCompleted := false
for {
select {
@@ -47,16 +53,25 @@ func (s *roFolder) Serve() {
return
case <-timer.C:
if err := s.model.CheckFolderHealth(s.folder); err != nil {
l.Infoln("Skipping folder", s.folder, "scan due to folder error:", err)
reschedule()
continue
}
if debug {
l.Debugln(s, "rescan")
}
s.setState(FolderScanning)
if err := s.model.ScanFolder(s.folder); err != nil {
s.model.cfg.InvalidateFolder(s.folder, err.Error())
return
// Potentially sets the error twice, once in the scanner just
// by doing a check, and once here, if the error returned is
// the same one as returned by CheckFolderHealth, though
// duplicate set is handled by SetFolderError
s.model.cfg.SetFolderError(s.folder, err)
reschedule()
continue
}
s.setState(FolderIdle)
if !initialScanCompleted {
l.Infoln("Completed initial scan (ro) of folder", s.folder)
@@ -67,9 +82,7 @@ func (s *roFolder) Serve() {
return
}
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (s.intv.Nanoseconds()*3 + rand.Int63n(2*s.intv.Nanoseconds())) / 4
timer.Reset(time.Duration(sleepNanos) * time.Nanosecond)
reschedule()
}
}
}

View File

@@ -114,10 +114,20 @@ func (p *rwFolder) Serve() {
var prevVer int64
var prevIgnoreHash string
rescheduleScan := func() {
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (p.scanIntv.Nanoseconds()*3 + rand.Int63n(2*p.scanIntv.Nanoseconds())) / 4
intv := time.Duration(sleepNanos) * time.Nanosecond
if debug {
l.Debugln(p, "next rescan in", intv)
}
scanTimer.Reset(intv)
}
// We don't start pulling files until a scan has been completed.
initialScanCompleted := false
loop:
for {
select {
case <-p.stop:
@@ -130,7 +140,6 @@ loop:
// device A to device B, so we have something to work against.
case <-pullTimer.C:
if !initialScanCompleted {
// How did we even get here?
if debug {
l.Debugln(p, "skip (initial)")
}
@@ -219,24 +228,28 @@ loop:
// this is the easiest way to make sure we are not doing both at the
// same time.
case <-scanTimer.C:
if err := p.model.CheckFolderHealth(p.folder); err != nil {
l.Infoln("Skipping folder", p.folder, "scan due to folder error:", err)
rescheduleScan()
continue
}
if debug {
l.Debugln(p, "rescan")
}
p.setState(FolderScanning)
if err := p.model.ScanFolder(p.folder); err != nil {
p.model.cfg.InvalidateFolder(p.folder, err.Error())
break loop
}
p.setState(FolderIdle)
if p.scanIntv > 0 {
// Sleep a random time between 3/4 and 5/4 of the configured interval.
sleepNanos := (p.scanIntv.Nanoseconds()*3 + rand.Int63n(2*p.scanIntv.Nanoseconds())) / 4
intv := time.Duration(sleepNanos) * time.Nanosecond
if debug {
l.Debugln(p, "next rescan in", intv)
}
scanTimer.Reset(intv)
if err := p.model.ScanFolder(p.folder); err != nil {
// Potentially sets the error twice, once in the scanner just
// by doing a check, and once here, if the error returned is
// the same one as returned by CheckFolderHealth, though
// duplicate set is handled by SetFolderError
p.model.cfg.SetFolderError(p.folder, err)
rescheduleScan()
continue
}
if p.scanIntv > 0 {
rescheduleScan()
}
if !initialScanCompleted {
l.Infoln("Completed initial scan (rw) of folder", p.folder)