lib/db, lib/model: Add sequence->deviceKey to db for sending indexes (#4906)

Instead of walking and unmarshalling the entire db and sorting the resulting
file infos by sequence, add store device keys by sequence number in the
database. Thus only the required file infos need be unmarshalled and are already
sorted by index.
This commit is contained in:
Simon Frei
2018-05-01 23:39:15 +02:00
committed by Jakob Borg
parent 2c18640386
commit a548014755
8 changed files with 254 additions and 469 deletions

View File

@@ -1639,14 +1639,15 @@ func (m *Model) receivedFile(folder string, file protocol.FileInfo) {
m.folderStatRef(folder).ReceivedFile(file.Name, file.IsDeleted())
}
func sendIndexes(conn protocol.Connection, folder string, fs *db.FileSet, ignores *ignore.Matcher, startSequence int64, dbLocation string, dropSymlinks bool) {
func sendIndexes(conn protocol.Connection, folder string, fs *db.FileSet, ignores *ignore.Matcher, prevSequence int64, dbLocation string, dropSymlinks bool) {
deviceID := conn.ID()
var err error
l.Debugf("Starting sendIndexes for %s to %s at %s (slv=%d)", folder, deviceID, conn, startSequence)
l.Debugf("Starting sendIndexes for %s to %s at %s (slv=%d)", folder, deviceID, conn, prevSequence)
defer l.Debugf("Exiting sendIndexes for %s to %s at %s: %v", folder, deviceID, conn, err)
minSequence, err := sendIndexTo(startSequence, conn, folder, fs, ignores, dbLocation, dropSymlinks)
// We need to send one index, regardless of whether there is something to send or not
prevSequence, err = sendIndexTo(prevSequence, conn, folder, fs, ignores, dbLocation, dropSymlinks)
// Subscribe to LocalIndexUpdated (we have new information to send) and
// DeviceDisconnected (it might be us who disconnected, so we should
@@ -1664,12 +1665,12 @@ func sendIndexes(conn protocol.Connection, folder string, fs *db.FileSet, ignore
// currently in the database, wait for the local index to update. The
// local index may update for other folders than the one we are
// sending for.
if fs.Sequence(protocol.LocalDeviceID) <= minSequence {
if fs.Sequence(protocol.LocalDeviceID) <= prevSequence {
sub.Poll(time.Minute)
continue
}
minSequence, err = sendIndexTo(minSequence, conn, folder, fs, ignores, dbLocation, dropSymlinks)
prevSequence, err = sendIndexTo(prevSequence, conn, folder, fs, ignores, dbLocation, dropSymlinks)
// Wait a short amount of time before entering the next loop. If there
// are continuous changes happening to the local index, this gives us
@@ -1678,43 +1679,20 @@ func sendIndexes(conn protocol.Connection, folder string, fs *db.FileSet, ignore
}
}
func sendIndexTo(minSequence int64, conn protocol.Connection, folder string, fs *db.FileSet, ignores *ignore.Matcher, dbLocation string, dropSymlinks bool) (int64, error) {
// sendIndexTo sends file infos with a sequence number higher than prevSequence and
// returns the highest sent sequence number.
func sendIndexTo(prevSequence int64, conn protocol.Connection, folder string, fs *db.FileSet, ignores *ignore.Matcher, dbLocation string, dropSymlinks bool) (int64, error) {
deviceID := conn.ID()
batch := make([]protocol.FileInfo, 0, maxBatchSizeFiles)
batchSizeBytes := 0
initial := minSequence == 0
maxSequence := minSequence
initial := prevSequence == 0
var err error
var f protocol.FileInfo
debugMsg := func(t string) string {
return fmt.Sprintf("Sending indexes for %s to %s at %s: %d files (<%d bytes) (%s)", folder, deviceID, conn, len(batch), batchSizeBytes, t)
}
sorter := NewIndexSorter(dbLocation)
defer sorter.Close()
fs.WithHave(protocol.LocalDeviceID, func(fi db.FileIntf) bool {
f := fi.(protocol.FileInfo)
if f.Sequence <= minSequence {
return true
}
if f.Sequence > maxSequence {
maxSequence = f.Sequence
}
if dropSymlinks && f.IsSymlink() {
// Do not send index entries with symlinks to clients that can't
// handle it. Fixes issue #3802. Once both sides are upgraded, a
// rescan (i.e., change) of the symlink is required for it to
// sync again, due to delta indexes.
return true
}
sorter.Append(f)
return true
})
sorter.Sorted(func(f protocol.FileInfo) bool {
fs.WithHaveSequence(prevSequence+1, func(fi db.FileIntf) bool {
if len(batch) == maxBatchSizeFiles || batchSizeBytes > maxBatchSizeBytes {
if initial {
if err = conn.Index(folder, batch); err != nil {
@@ -1733,24 +1711,43 @@ func sendIndexTo(minSequence int64, conn protocol.Connection, folder string, fs
batchSizeBytes = 0
}
f = fi.(protocol.FileInfo)
if dropSymlinks && f.IsSymlink() {
// Do not send index entries with symlinks to clients that can't
// handle it. Fixes issue #3802. Once both sides are upgraded, a
// rescan (i.e., change) of the symlink is required for it to
// sync again, due to delta indexes.
return true
}
batch = append(batch, f)
batchSizeBytes += f.ProtoSize()
return true
})
if initial && err == nil {
if err != nil {
return prevSequence, err
}
if initial {
err = conn.Index(folder, batch)
if err == nil {
l.Debugln(debugMsg("small initial index"))
}
} else if len(batch) > 0 && err == nil {
} else if len(batch) > 0 {
err = conn.IndexUpdate(folder, batch)
if err == nil {
l.Debugln(debugMsg("last batch"))
}
}
return maxSequence, err
// True if there was nothing to be sent
if f.Sequence == 0 {
return prevSequence, err
}
return f.Sequence, err
}
func (m *Model) updateLocalsFromScanning(folder string, fs []protocol.FileInfo) {

View File

@@ -1,199 +0,0 @@
// Copyright (C) 2016 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package model
import (
"encoding/binary"
"io/ioutil"
"os"
"sort"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/opt"
)
const (
maxBytesInMemory = 512 << 10
)
// The IndexSorter sorts FileInfos based on their Sequence. You use it
// by first Append()ing all entries to be sorted, then calling Sorted()
// which will iterate over all the items in correctly sorted order.
type IndexSorter interface {
Append(f protocol.FileInfo)
Sorted(fn func(f protocol.FileInfo) bool)
Close()
}
type internalIndexSorter interface {
IndexSorter
full() bool
copyTo(to IndexSorter)
}
// NewIndexSorter returns a new IndexSorter that will start out in memory
// for efficiency but switch to on disk storage once the amount of data
// becomes large.
func NewIndexSorter(location string) IndexSorter {
return &autoSwitchingIndexSorter{
internalIndexSorter: newInMemoryIndexSorter(),
location: location,
}
}
// An autoSwitchingSorter starts out as an inMemorySorter but becomes an
// onDiskSorter when the in memory sorter is full().
type autoSwitchingIndexSorter struct {
internalIndexSorter
location string
}
func (s *autoSwitchingIndexSorter) Append(f protocol.FileInfo) {
if s.internalIndexSorter.full() {
// We spill before adding a file instead of after, to handle the
// case where we're over max size but won't add any more files, in
// which case we *don't* need to spill. An example of this would be
// an index containing just a single large file.
l.Debugf("sorter %p spills to disk", s)
next := newOnDiskIndexSorter(s.location)
s.internalIndexSorter.copyTo(next)
s.internalIndexSorter = next
}
s.internalIndexSorter.Append(f)
}
// An inMemoryIndexSorter is simply a slice of FileInfos. The full() method
// returns true when the number of files exceeds maxFiles or the total
// number of blocks exceeds maxBlocks.
type inMemoryIndexSorter struct {
files []protocol.FileInfo
bytes int
maxBytes int
}
func newInMemoryIndexSorter() *inMemoryIndexSorter {
return &inMemoryIndexSorter{
maxBytes: maxBytesInMemory,
}
}
func (s *inMemoryIndexSorter) Append(f protocol.FileInfo) {
s.files = append(s.files, f)
s.bytes += f.ProtoSize()
}
func (s *inMemoryIndexSorter) Sorted(fn func(protocol.FileInfo) bool) {
sort.Sort(bySequence(s.files))
for _, f := range s.files {
if !fn(f) {
break
}
}
}
func (s *inMemoryIndexSorter) Close() {
}
func (s *inMemoryIndexSorter) full() bool {
return s.bytes >= s.maxBytes
}
func (s *inMemoryIndexSorter) copyTo(dst IndexSorter) {
for _, f := range s.files {
dst.Append(f)
}
}
// bySequence sorts FileInfos by Sequence
type bySequence []protocol.FileInfo
func (l bySequence) Len() int {
return len(l)
}
func (l bySequence) Swap(a, b int) {
l[a], l[b] = l[b], l[a]
}
func (l bySequence) Less(a, b int) bool {
return l[a].Sequence < l[b].Sequence
}
// An onDiskIndexSorter is backed by a LevelDB database in the temporary
// directory. It relies on the fact that iterating over the database is done
// in key order and uses the Sequence as key. When done with an
// onDiskIndexSorter you must call Close() to remove the temporary database.
type onDiskIndexSorter struct {
db *leveldb.DB
dir string
}
func newOnDiskIndexSorter(location string) *onDiskIndexSorter {
// Set options to minimize resource usage.
opts := &opt.Options{
OpenFilesCacheCapacity: 10,
WriteBuffer: 512 << 10,
}
// Use a temporary database directory.
tmp, err := ioutil.TempDir(location, "tmp-index-sorter.")
if err != nil {
panic("creating temporary directory: " + err.Error())
}
db, err := leveldb.OpenFile(tmp, opts)
if err != nil {
panic("creating temporary database: " + err.Error())
}
s := &onDiskIndexSorter{
db: db,
dir: tmp,
}
l.Debugf("onDiskIndexSorter %p created at %s", s, tmp)
return s
}
func (s *onDiskIndexSorter) Append(f protocol.FileInfo) {
key := make([]byte, 8)
binary.BigEndian.PutUint64(key[:], uint64(f.Sequence))
data, err := f.Marshal()
if err != nil {
panic("bug: marshalling FileInfo should never fail: " + err.Error())
}
err = s.db.Put(key, data, nil)
if err != nil {
panic("writing to temporary database: " + err.Error())
}
}
func (s *onDiskIndexSorter) Sorted(fn func(protocol.FileInfo) bool) {
it := s.db.NewIterator(nil, nil)
defer it.Release()
for it.Next() {
var f protocol.FileInfo
if err := f.Unmarshal(it.Value()); err != nil {
panic("unmarshal failed: " + err.Error())
}
if !fn(f) {
break
}
}
}
func (s *onDiskIndexSorter) Close() {
l.Debugf("onDiskIndexSorter %p closes", s)
s.db.Close()
os.RemoveAll(s.dir)
}
func (s *onDiskIndexSorter) full() bool {
return false
}
func (s *onDiskIndexSorter) copyTo(dst IndexSorter) {
// Just wrap Sorted() if we need to support this in the future.
panic("unsupported")
}

View File

@@ -1,157 +0,0 @@
// Copyright (C) 2016 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package model
import (
"fmt"
"os"
"testing"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syncthing/syncthing/lib/rand"
)
func TestInMemoryIndexSorter(t *testing.T) {
// An inMemorySorter should be able to absorb a few files in unsorted
// order, and return them sorted.
s := newInMemoryIndexSorter()
addFiles(50, s)
verifySorted(t, s, 50)
verifyBreak(t, s, 50)
s.Close()
}
func TestOnDiskIndexSorter(t *testing.T) {
// An onDiskSorter should be able to absorb a few files in unsorted
// order, and return them sorted.
s := newOnDiskIndexSorter("testdata")
addFiles(50, s)
verifySorted(t, s, 50)
verifyBreak(t, s, 50)
// The temporary database should exist on disk. When Close()d, it should
// be removed.
info, err := os.Stat(s.dir)
if err != nil {
t.Fatal("temp database should exist on disk:", err)
}
if !info.IsDir() {
t.Fatal("temp database should be a directory")
}
s.Close()
_, err = os.Stat(s.dir)
if !os.IsNotExist(err) {
t.Fatal("temp database should have been removed")
}
}
func TestIndexSorter(t *testing.T) {
// An default IndexSorter should be able to absorb files, have them in
// memory, and at some point switch to an on disk database.
s := NewIndexSorter("testdata")
defer s.Close()
// We should start out as an in memory store.
nFiles := 1
addFiles(1, s)
verifySorted(t, s, nFiles)
as := s.(*autoSwitchingIndexSorter)
if _, ok := as.internalIndexSorter.(*inMemoryIndexSorter); !ok {
t.Fatalf("the sorter should be in memory after only one file")
}
// At some point, for sure with less than maxBytesInMemory files, we
// should switch over to an on disk sorter.
for i := 0; i < maxBytesInMemory; i++ {
addFiles(1, s)
nFiles++
if _, ok := as.internalIndexSorter.(*onDiskIndexSorter); ok {
break
}
}
if _, ok := as.internalIndexSorter.(*onDiskIndexSorter); !ok {
t.Fatalf("the sorter should be on disk after %d files", nFiles)
}
verifySorted(t, s, nFiles)
// For test coverage, as some methods are called on the onDiskSorter
// only after switching to it.
addFiles(1, s)
verifySorted(t, s, nFiles+1)
}
// addFiles adds files with random Sequence to the Sorter.
func addFiles(n int, s IndexSorter) {
for i := 0; i < n; i++ {
rnd := rand.Int63()
f := protocol.FileInfo{
Name: fmt.Sprintf("file-%d", rnd),
Size: rand.Int63(),
Permissions: uint32(rand.Intn(0777)),
ModifiedS: rand.Int63(),
ModifiedNs: int32(rand.Int63()),
Sequence: rnd,
Version: protocol.Vector{Counters: []protocol.Counter{{ID: 42, Value: uint64(rand.Int63())}}},
Blocks: []protocol.BlockInfo{{
Size: int32(rand.Intn(128 << 10)),
Hash: []byte(rand.String(32)),
}},
}
s.Append(f)
}
}
// verifySorted checks that the files are returned sorted by Sequence.
func verifySorted(t *testing.T, s IndexSorter, expected int) {
prevSequence := int64(-1)
seen := 0
s.Sorted(func(f protocol.FileInfo) bool {
if f.Sequence <= prevSequence {
t.Fatalf("Unsorted Sequence, %d <= %d", f.Sequence, prevSequence)
}
prevSequence = f.Sequence
seen++
return true
})
if seen != expected {
t.Fatalf("expected %d files returned, got %d", expected, seen)
}
}
// verifyBreak checks that the Sorter stops iteration once we return false.
func verifyBreak(t *testing.T, s IndexSorter, expected int) {
prevSequence := int64(-1)
seen := 0
s.Sorted(func(f protocol.FileInfo) bool {
if f.Sequence <= prevSequence {
t.Fatalf("Unsorted Sequence, %d <= %d", f.Sequence, prevSequence)
}
if len(f.Blocks) != 1 {
t.Fatalf("incorrect number of blocks %d != 1", len(f.Blocks))
}
if len(f.Version.Counters) != 1 {
t.Fatalf("incorrect number of version counters %d != 1", len(f.Version.Counters))
}
prevSequence = f.Sequence
seen++
return seen < expected/2
})
if seen != expected/2 {
t.Fatalf("expected %d files iterated over, got %d", expected, seen)
}
}