lib/model, cmd/syncthing: Wait for folder restarts to complete (fixes #5233) (#5239)

* lib/model, cmd/syncthing: Wait for folder restarts to complete (fixes #5233)

This is the somewhat ugly - but on the other hand clear - fix for what
is really a somewhat thorny issue. To avoid zombie folder runners a new
mutex is introduced that protects the RestartFolder operation. I hate
adding more mutexes but the alternatives I can think of are worse.

The other part of it is that the POST /rest/system/config operation now
waits for the config commit to complete. The point of this is that until
the commit has completed we should not accept another config commit. If
we did, we could end up with two separate RestartFolders queued in the
background. While they are both correct, and will run without
interfering with each other, we can't guarantee the order in which they
will run. Thus it could happen that the newer config got committed
first, and the older config commited after that, leaving us with the
wrong config running.

* test

* wip

* hax

* hax

* unflake test

* per folder mutexes

* paranoia

* race
This commit is contained in:
Jakob Borg
2018-10-05 10:26:25 +02:00
committed by Audrius Butkevicius
parent c9d6366d75
commit f528923d1e
5 changed files with 102 additions and 11 deletions

View File

@@ -20,6 +20,7 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
@@ -3850,6 +3851,59 @@ func addFakeConn(m *Model, dev protocol.DeviceID) *fakeConnection {
return fc
}
func TestFolderRestartZombies(t *testing.T) {
// This is for issue 5233, where multiple concurrent folder restarts
// would leave more than one folder runner alive.
wrapper := createTmpWrapper(defaultCfg.Copy())
folderCfg, _ := wrapper.Folder("default")
folderCfg.FilesystemType = fs.FilesystemTypeFake
wrapper.SetFolder(folderCfg)
db := db.OpenMemory()
m := NewModel(wrapper, protocol.LocalDeviceID, "syncthing", "dev", db, nil)
m.AddFolder(folderCfg)
m.StartFolder("default")
m.ServeBackground()
defer m.Stop()
// Make sure the folder is up and running, because we want to count it.
m.ScanFolder("default")
// Check how many running folders we have running before the test.
if r := atomic.LoadInt32(&m.foldersRunning); r != 1 {
t.Error("Expected one running folder, not", r)
}
// Run a few parallel configuration changers for one second. Each waits
// for the commit to complete, but there are many of them.
var wg sync.WaitGroup
for i := 0; i < 25; i++ {
wg.Add(1)
go func() {
defer wg.Done()
t0 := time.Now()
for time.Since(t0) < time.Second {
cfg := folderCfg.Copy()
cfg.MaxConflicts = rand.Int() // safe change that should cause a folder restart
w, err := wrapper.SetFolder(cfg)
if err != nil {
panic(err)
}
w.Wait()
}
}()
}
// Wait for the above to complete and check how many folders we have
// running now. It should not have increased.
wg.Wait()
if r := atomic.LoadInt32(&m.foldersRunning); r != 1 {
t.Error("Expected one running folder, not", r)
}
}
type fakeAddr struct{}
func (fakeAddr) Network() string {