More graceful handling on folder errors (fixes #762)
Checks health before accepting every scanner batch, also recovers from errors without having to restart.
This commit is contained in:
committed by
Jakob Borg
parent
34ba5678c3
commit
7406176fad
@@ -1104,7 +1104,11 @@ func (m *Model) ScanFolders() map[string]error {
|
||||
errorsMut.Lock()
|
||||
errors[folder] = err
|
||||
errorsMut.Unlock()
|
||||
m.cfg.InvalidateFolder(folder, err.Error())
|
||||
// Potentially sets the error twice, once in the scanner just
|
||||
// by doing a check, and once here, if the error returned is
|
||||
// the same one as returned by CheckFolderHealth, though
|
||||
// duplicate set is handled by SetFolderError
|
||||
m.cfg.SetFolderError(folder, err)
|
||||
}
|
||||
wg.Done()
|
||||
}()
|
||||
@@ -1180,9 +1184,11 @@ nextSub:
|
||||
}
|
||||
|
||||
runner.setState(FolderScanning)
|
||||
defer runner.setState(FolderIdle)
|
||||
fchan, err := w.Walk()
|
||||
|
||||
if err != nil {
|
||||
m.cfg.SetFolderError(folder, err)
|
||||
return err
|
||||
}
|
||||
batchSize := 100
|
||||
@@ -1196,12 +1202,20 @@ nextSub:
|
||||
"size": f.Size(),
|
||||
})
|
||||
if len(batch) == batchSize {
|
||||
if err := m.CheckFolderHealth(folder); err != nil {
|
||||
l.Infoln("Stopping folder %s mid-scan due to folder error: %s", folder, err)
|
||||
return err
|
||||
}
|
||||
fs.Update(protocol.LocalDeviceID, batch)
|
||||
batch = batch[:0]
|
||||
}
|
||||
batch = append(batch, f)
|
||||
}
|
||||
if len(batch) > 0 {
|
||||
|
||||
if err := m.CheckFolderHealth(folder); err != nil {
|
||||
l.Infoln("Stopping folder %s mid-scan due to folder error: %s", folder, err)
|
||||
return err
|
||||
} else if len(batch) > 0 {
|
||||
fs.Update(protocol.LocalDeviceID, batch)
|
||||
}
|
||||
|
||||
@@ -1286,7 +1300,6 @@ nextSub:
|
||||
fs.Update(protocol.LocalDeviceID, batch)
|
||||
}
|
||||
|
||||
runner.setState(FolderIdle)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1510,6 +1523,73 @@ func (m *Model) BringToFront(folder, file string) {
|
||||
}
|
||||
}
|
||||
|
||||
// Returns current folder error, or nil if the folder is healthy.
|
||||
// Updates the Invalid field on the folder configuration struct, and emits a
|
||||
// ConfigSaved event which causes a GUI refresh.
|
||||
func (m *Model) CheckFolderHealth(id string) error {
|
||||
folder, ok := m.cfg.Folders()[id]
|
||||
if !ok {
|
||||
return errors.New("Folder does not exist")
|
||||
}
|
||||
|
||||
fi, err := os.Stat(folder.Path)
|
||||
if m.CurrentLocalVersion(id) > 0 {
|
||||
// Safety check. If the cached index contains files but the
|
||||
// folder doesn't exist, we have a problem. We would assume
|
||||
// that all files have been deleted which might not be the case,
|
||||
// so mark it as invalid instead.
|
||||
if err != nil || !fi.IsDir() {
|
||||
err = errors.New("Folder path missing")
|
||||
} else if !folder.HasMarker() {
|
||||
err = errors.New("Folder marker missing")
|
||||
}
|
||||
} else if os.IsNotExist(err) {
|
||||
// If we don't have any files in the index, and the directory
|
||||
// doesn't exist, try creating it.
|
||||
err = os.MkdirAll(folder.Path, 0700)
|
||||
if err == nil {
|
||||
err = folder.CreateMarker()
|
||||
}
|
||||
} else if !folder.HasMarker() {
|
||||
// If we don't have any files in the index, and the path does exist
|
||||
// but the marker is not there, create it.
|
||||
err = folder.CreateMarker()
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
if folder.Invalid != "" {
|
||||
l.Infof("Starting folder %q after error %q", folder.ID, folder.Invalid)
|
||||
m.cfg.SetFolderError(id, nil)
|
||||
}
|
||||
|
||||
if folder, ok := m.cfg.Folders()[id]; !ok || folder.Invalid != "" {
|
||||
panic("Unable to unset folder \"" + id + "\" error.")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
if folder.Invalid == err.Error() {
|
||||
return err
|
||||
}
|
||||
|
||||
// folder is a copy of the original struct, hence Invalid value is
|
||||
// preserved after the set.
|
||||
m.cfg.SetFolderError(id, err)
|
||||
|
||||
if folder.Invalid == "" {
|
||||
l.Warnf("Stopping folder %q - %v", folder.ID, err)
|
||||
} else {
|
||||
l.Infof("Folder %q error changed: %q -> %q", folder.ID, folder.Invalid, err)
|
||||
}
|
||||
|
||||
if folder, ok := m.cfg.Folders()[id]; !ok || folder.Invalid != err.Error() {
|
||||
panic("Unable to set folder \"" + id + "\" error.")
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func (m *Model) String() string {
|
||||
return fmt.Sprintf("model@%p", m)
|
||||
}
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
|
||||
"github.com/syncthing/protocol"
|
||||
"github.com/syncthing/syncthing/internal/config"
|
||||
"github.com/syncthing/syncthing/internal/db"
|
||||
"github.com/syndtr/goleveldb/leveldb"
|
||||
"github.com/syndtr/goleveldb/leveldb/storage"
|
||||
)
|
||||
@@ -578,6 +579,166 @@ func TestRefuseUnknownBits(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestROScanRecovery(t *testing.T) {
|
||||
ldb, _ := leveldb.Open(storage.NewMemStorage(), nil)
|
||||
set := db.NewFileSet("default", ldb)
|
||||
set.Update(protocol.LocalDeviceID, []protocol.FileInfo{
|
||||
{Name: "dummyfile"},
|
||||
})
|
||||
|
||||
fcfg := config.FolderConfiguration{
|
||||
ID: "default",
|
||||
Path: "testdata/rotestfolder",
|
||||
RescanIntervalS: 1,
|
||||
}
|
||||
cfg := config.Wrap("/tmp/test", config.Configuration{
|
||||
Folders: []config.FolderConfiguration{fcfg},
|
||||
Devices: []config.DeviceConfiguration{
|
||||
{
|
||||
DeviceID: device1,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
os.RemoveAll(fcfg.Path)
|
||||
|
||||
m := NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
|
||||
|
||||
m.AddFolder(fcfg)
|
||||
m.StartFolderRO("default")
|
||||
|
||||
waitFor := func(status string) error {
|
||||
timeout := time.Now().Add(2 * time.Second)
|
||||
for {
|
||||
if time.Now().After(timeout) {
|
||||
return fmt.Errorf("Timed out waiting for status: %s, current status: %s", status, m.cfg.Folders()["default"].Invalid)
|
||||
}
|
||||
if m.cfg.Folders()["default"].Invalid == status {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
if err := waitFor("Folder path missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
os.Mkdir(fcfg.Path, 0700)
|
||||
|
||||
if err := waitFor("Folder marker missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
fd, err := os.Create(filepath.Join(fcfg.Path, ".stfolder"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
if err := waitFor(""); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
os.Remove(filepath.Join(fcfg.Path, ".stfolder"))
|
||||
|
||||
if err := waitFor("Folder marker missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
os.Remove(fcfg.Path)
|
||||
|
||||
if err := waitFor("Folder path missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func TestRWScanRecovery(t *testing.T) {
|
||||
ldb, _ := leveldb.Open(storage.NewMemStorage(), nil)
|
||||
set := db.NewFileSet("default", ldb)
|
||||
set.Update(protocol.LocalDeviceID, []protocol.FileInfo{
|
||||
{Name: "dummyfile"},
|
||||
})
|
||||
|
||||
fcfg := config.FolderConfiguration{
|
||||
ID: "default",
|
||||
Path: "testdata/rwtestfolder",
|
||||
RescanIntervalS: 1,
|
||||
}
|
||||
cfg := config.Wrap("/tmp/test", config.Configuration{
|
||||
Folders: []config.FolderConfiguration{fcfg},
|
||||
Devices: []config.DeviceConfiguration{
|
||||
{
|
||||
DeviceID: device1,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
os.RemoveAll(fcfg.Path)
|
||||
|
||||
m := NewModel(cfg, protocol.LocalDeviceID, "device", "syncthing", "dev", ldb)
|
||||
|
||||
m.AddFolder(fcfg)
|
||||
m.StartFolderRW("default")
|
||||
|
||||
waitFor := func(status string) error {
|
||||
timeout := time.Now().Add(2 * time.Second)
|
||||
for {
|
||||
if time.Now().After(timeout) {
|
||||
return fmt.Errorf("Timed out waiting for status: %s, current status: %s", status, m.cfg.Folders()["default"].Invalid)
|
||||
}
|
||||
if m.cfg.Folders()["default"].Invalid == status {
|
||||
return nil
|
||||
}
|
||||
time.Sleep(10 * time.Millisecond)
|
||||
}
|
||||
}
|
||||
|
||||
if err := waitFor("Folder path missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
os.Mkdir(fcfg.Path, 0700)
|
||||
|
||||
if err := waitFor("Folder marker missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
fd, err := os.Create(filepath.Join(fcfg.Path, ".stfolder"))
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
fd.Close()
|
||||
|
||||
if err := waitFor(""); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
os.Remove(filepath.Join(fcfg.Path, ".stfolder"))
|
||||
|
||||
if err := waitFor("Folder marker missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
os.Remove(fcfg.Path)
|
||||
|
||||
if err := waitFor("Folder path missing"); err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func TestGlobalDirectoryTree(t *testing.T) {
|
||||
db, _ := leveldb.Open(storage.NewMemStorage(), nil)
|
||||
m := NewModel(defaultConfig, protocol.LocalDeviceID, "device", "syncthing", "dev", db)
|
||||
|
||||
@@ -40,6 +40,12 @@ func (s *roFolder) Serve() {
|
||||
timer := time.NewTimer(time.Millisecond)
|
||||
defer timer.Stop()
|
||||
|
||||
reschedule := func() {
|
||||
// Sleep a random time between 3/4 and 5/4 of the configured interval.
|
||||
sleepNanos := (s.intv.Nanoseconds()*3 + rand.Int63n(2*s.intv.Nanoseconds())) / 4
|
||||
timer.Reset(time.Duration(sleepNanos) * time.Nanosecond)
|
||||
}
|
||||
|
||||
initialScanCompleted := false
|
||||
for {
|
||||
select {
|
||||
@@ -47,16 +53,25 @@ func (s *roFolder) Serve() {
|
||||
return
|
||||
|
||||
case <-timer.C:
|
||||
if err := s.model.CheckFolderHealth(s.folder); err != nil {
|
||||
l.Infoln("Skipping folder", s.folder, "scan due to folder error:", err)
|
||||
reschedule()
|
||||
continue
|
||||
}
|
||||
|
||||
if debug {
|
||||
l.Debugln(s, "rescan")
|
||||
}
|
||||
|
||||
s.setState(FolderScanning)
|
||||
if err := s.model.ScanFolder(s.folder); err != nil {
|
||||
s.model.cfg.InvalidateFolder(s.folder, err.Error())
|
||||
return
|
||||
// Potentially sets the error twice, once in the scanner just
|
||||
// by doing a check, and once here, if the error returned is
|
||||
// the same one as returned by CheckFolderHealth, though
|
||||
// duplicate set is handled by SetFolderError
|
||||
s.model.cfg.SetFolderError(s.folder, err)
|
||||
reschedule()
|
||||
continue
|
||||
}
|
||||
s.setState(FolderIdle)
|
||||
|
||||
if !initialScanCompleted {
|
||||
l.Infoln("Completed initial scan (ro) of folder", s.folder)
|
||||
@@ -67,9 +82,7 @@ func (s *roFolder) Serve() {
|
||||
return
|
||||
}
|
||||
|
||||
// Sleep a random time between 3/4 and 5/4 of the configured interval.
|
||||
sleepNanos := (s.intv.Nanoseconds()*3 + rand.Int63n(2*s.intv.Nanoseconds())) / 4
|
||||
timer.Reset(time.Duration(sleepNanos) * time.Nanosecond)
|
||||
reschedule()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,10 +114,20 @@ func (p *rwFolder) Serve() {
|
||||
var prevVer int64
|
||||
var prevIgnoreHash string
|
||||
|
||||
rescheduleScan := func() {
|
||||
// Sleep a random time between 3/4 and 5/4 of the configured interval.
|
||||
sleepNanos := (p.scanIntv.Nanoseconds()*3 + rand.Int63n(2*p.scanIntv.Nanoseconds())) / 4
|
||||
intv := time.Duration(sleepNanos) * time.Nanosecond
|
||||
|
||||
if debug {
|
||||
l.Debugln(p, "next rescan in", intv)
|
||||
}
|
||||
scanTimer.Reset(intv)
|
||||
}
|
||||
|
||||
// We don't start pulling files until a scan has been completed.
|
||||
initialScanCompleted := false
|
||||
|
||||
loop:
|
||||
for {
|
||||
select {
|
||||
case <-p.stop:
|
||||
@@ -130,7 +140,6 @@ loop:
|
||||
// device A to device B, so we have something to work against.
|
||||
case <-pullTimer.C:
|
||||
if !initialScanCompleted {
|
||||
// How did we even get here?
|
||||
if debug {
|
||||
l.Debugln(p, "skip (initial)")
|
||||
}
|
||||
@@ -219,24 +228,28 @@ loop:
|
||||
// this is the easiest way to make sure we are not doing both at the
|
||||
// same time.
|
||||
case <-scanTimer.C:
|
||||
if err := p.model.CheckFolderHealth(p.folder); err != nil {
|
||||
l.Infoln("Skipping folder", p.folder, "scan due to folder error:", err)
|
||||
rescheduleScan()
|
||||
continue
|
||||
}
|
||||
|
||||
if debug {
|
||||
l.Debugln(p, "rescan")
|
||||
}
|
||||
p.setState(FolderScanning)
|
||||
if err := p.model.ScanFolder(p.folder); err != nil {
|
||||
p.model.cfg.InvalidateFolder(p.folder, err.Error())
|
||||
break loop
|
||||
}
|
||||
p.setState(FolderIdle)
|
||||
if p.scanIntv > 0 {
|
||||
// Sleep a random time between 3/4 and 5/4 of the configured interval.
|
||||
sleepNanos := (p.scanIntv.Nanoseconds()*3 + rand.Int63n(2*p.scanIntv.Nanoseconds())) / 4
|
||||
intv := time.Duration(sleepNanos) * time.Nanosecond
|
||||
|
||||
if debug {
|
||||
l.Debugln(p, "next rescan in", intv)
|
||||
}
|
||||
scanTimer.Reset(intv)
|
||||
if err := p.model.ScanFolder(p.folder); err != nil {
|
||||
// Potentially sets the error twice, once in the scanner just
|
||||
// by doing a check, and once here, if the error returned is
|
||||
// the same one as returned by CheckFolderHealth, though
|
||||
// duplicate set is handled by SetFolderError
|
||||
p.model.cfg.SetFolderError(p.folder, err)
|
||||
rescheduleScan()
|
||||
continue
|
||||
}
|
||||
|
||||
if p.scanIntv > 0 {
|
||||
rescheduleScan()
|
||||
}
|
||||
if !initialScanCompleted {
|
||||
l.Infoln("Completed initial scan (rw) of folder", p.folder)
|
||||
|
||||
Reference in New Issue
Block a user