lib/db, lib/model: Add sequence->deviceKey to db for sending indexes (#4906)

Instead of walking and unmarshalling the entire db and sorting the resulting
file infos by sequence, add store device keys by sequence number in the
database. Thus only the required file infos need be unmarshalled and are already
sorted by index.
This commit is contained in:
Simon Frei
2018-05-01 23:39:15 +02:00
committed by Jakob Borg
parent 2c18640386
commit a548014755
8 changed files with 254 additions and 469 deletions

View File

@@ -11,11 +11,9 @@ import (
"fmt"
"github.com/syncthing/syncthing/lib/protocol"
"github.com/syndtr/goleveldb/leveldb"
"github.com/syndtr/goleveldb/leveldb/opt"
)
const dbVersion = 1
const dbVersion = 2
const (
KeyTypeDevice = iota
@@ -29,6 +27,7 @@ const (
KeyTypeIndexID
KeyTypeFolderMeta
KeyTypeMiscData
KeyTypeSequence
)
func (l VersionList) String() string {
@@ -60,28 +59,5 @@ func (l fileList) Less(a, b int) bool {
return l[a].Name < l[b].Name
}
type dbReader interface {
Get([]byte, *opt.ReadOptions) ([]byte, error)
}
// Flush batches to disk when they contain this many records.
const batchFlushSize = 64
func getFile(db dbReader, key []byte) (protocol.FileInfo, bool) {
bs, err := db.Get(key, nil)
if err == leveldb.ErrNotFound {
return protocol.FileInfo{}, false
}
if err != nil {
l.Debugln("surprise error:", err)
return protocol.FileInfo{}, false
}
var f protocol.FileInfo
err = f.Unmarshal(bs)
if err != nil {
l.Debugln("unmarshal error:", err)
return protocol.FileInfo{}, false
}
return f, true
}

View File

@@ -35,10 +35,13 @@ type Instance struct {
}
const (
keyPrefixLen = 1
keyFolderLen = 4 // indexed
keyDeviceLen = 4 // indexed
keyHashLen = 32
keyPrefixLen = 1
keyFolderLen = 4 // indexed
keyDeviceLen = 4 // indexed
keySequenceLen = 8
keyHashLen = 32
maxInt64 int64 = 1<<63 - 1
)
func Open(file string) (*Instance, error) {
@@ -88,13 +91,21 @@ func (db *Instance) UpdateSchema() {
miscDB := NewNamespacedKV(db, string(KeyTypeMiscData))
prevVersion, _ := miscDB.Int64("dbVersion")
if prevVersion >= dbVersion {
return
}
l.Infof("Updating database schema version from %v to %v...", prevVersion, dbVersion)
if prevVersion == 0 {
db.updateSchema0to1()
}
if prevVersion != dbVersion {
miscDB.PutInt64("dbVersion", dbVersion)
if prevVersion <= 1 {
db.updateSchema1to2()
}
l.Infof("Finished updating database schema version from %v to %v", prevVersion, dbVersion)
miscDB.PutInt64("dbVersion", dbVersion)
}
// Committed returns the number of items committed to the database since startup
@@ -112,9 +123,10 @@ func (db *Instance) updateFiles(folder, device []byte, fs []protocol.FileInfo, m
defer t.close()
var fk []byte
var gk []byte
for _, f := range fs {
name := []byte(f.Name)
fk = db.deviceKeyInto(fk[:cap(fk)], folder, device, name)
fk = db.deviceKeyInto(fk, folder, device, name)
// Get and unmarshal the file entry. If it doesn't exist or can't be
// unmarshalled we'll add it as a new entry.
@@ -135,8 +147,10 @@ func (db *Instance) updateFiles(folder, device []byte, fs []protocol.FileInfo, m
}
meta.addFile(devID, f)
t.insertFile(folder, device, f)
t.updateGlobal(folder, device, f, meta)
t.insertFile(fk, folder, device, f)
gk = db.globalKeyInto(gk, folder, name)
t.updateGlobal(gk, folder, device, f, meta)
// Write out and reuse the batch every few records, to avoid the batch
// growing too large and thus allocating unnecessarily much memory.
@@ -144,6 +158,33 @@ func (db *Instance) updateFiles(folder, device []byte, fs []protocol.FileInfo, m
}
}
func (db *Instance) addSequences(folder []byte, fs []protocol.FileInfo) {
t := db.newReadWriteTransaction()
defer t.close()
var sk []byte
var dk []byte
for _, f := range fs {
sk = db.sequenceKeyInto(sk, folder, f.Sequence)
dk = db.deviceKeyInto(dk, folder, protocol.LocalDeviceID[:], []byte(f.Name))
t.Put(sk, dk)
l.Debugf("adding sequence; folder=%q sequence=%v %v", folder, f.Sequence, f.Name)
t.checkFlush()
}
}
func (db *Instance) removeSequences(folder []byte, fs []protocol.FileInfo) {
t := db.newReadWriteTransaction()
defer t.close()
var sk []byte
for _, f := range fs {
t.Delete(db.sequenceKeyInto(sk, folder, f.Sequence))
l.Debugf("removing sequence; folder=%q sequence=%v %v", folder, f.Sequence, f.Name)
t.checkFlush()
}
}
func (db *Instance) withHave(folder, device, prefix []byte, truncate bool, fn Iterator) {
t := db.newReadOnlyTransaction()
defer t.close()
@@ -171,7 +212,26 @@ func (db *Instance) withHave(folder, device, prefix []byte, truncate bool, fn It
l.Debugln("unmarshal error:", err)
continue
}
if cont := fn(f); !cont {
if !fn(f) {
return
}
}
}
func (db *Instance) withHaveSequence(folder []byte, startSeq int64, fn Iterator) {
t := db.newReadOnlyTransaction()
defer t.close()
dbi := t.NewIterator(&util.Range{Start: db.sequenceKey(folder, startSeq), Limit: db.sequenceKey(folder, maxInt64)}, nil)
defer dbi.Release()
for dbi.Next() {
f, ok := db.getFile(dbi.Value())
if !ok {
l.Debugln("missing file for sequence number", db.sequenceKeySequence(dbi.Key()))
continue
}
if !fn(f) {
return
}
}
@@ -184,6 +244,8 @@ func (db *Instance) withAllFolderTruncated(folder []byte, fn func(device []byte,
dbi := t.NewIterator(util.BytesPrefix(db.deviceKey(folder, nil, nil)[:keyPrefixLen+keyFolderLen]), nil)
defer dbi.Release()
var gk []byte
for dbi.Next() {
device := db.deviceKeyDevice(dbi.Key())
var f FileInfoTruncated
@@ -200,20 +262,37 @@ func (db *Instance) withAllFolderTruncated(folder []byte, fn func(device []byte,
switch f.Name {
case "", ".", "..", "/": // A few obviously invalid filenames
l.Infof("Dropping invalid filename %q from database", f.Name)
t.removeFromGlobal(folder, device, nil, nil)
name := []byte(f.Name)
gk = db.globalKeyInto(gk, folder, name)
t.removeFromGlobal(gk, folder, device, name, nil)
t.Delete(dbi.Key())
t.checkFlush()
continue
}
if cont := fn(device, f); !cont {
if !fn(device, f) {
return
}
}
}
func (db *Instance) getFile(folder, device, file []byte) (protocol.FileInfo, bool) {
return getFile(db, db.deviceKey(folder, device, file))
func (db *Instance) getFile(key []byte) (protocol.FileInfo, bool) {
bs, err := db.Get(key, nil)
if err == leveldb.ErrNotFound {
return protocol.FileInfo{}, false
}
if err != nil {
l.Debugln("surprise error:", err)
return protocol.FileInfo{}, false
}
var f protocol.FileInfo
err = f.Unmarshal(bs)
if err != nil {
l.Debugln("unmarshal error:", err)
return protocol.FileInfo{}, false
}
return f, true
}
func (db *Instance) getGlobal(folder, file []byte, truncate bool) (FileIntf, bool) {
@@ -286,7 +365,7 @@ func (db *Instance) withGlobal(folder, prefix []byte, truncate bool, fn Iterator
return
}
fk = db.deviceKeyInto(fk[:cap(fk)], folder, vl.Versions[0].Device, name)
fk = db.deviceKeyInto(fk, folder, vl.Versions[0].Device, name)
bs, err := t.Get(fk, nil)
if err != nil {
l.Debugln("surprise error:", err)
@@ -299,7 +378,7 @@ func (db *Instance) withGlobal(folder, prefix []byte, truncate bool, fn Iterator
continue
}
if cont := fn(f); !cont {
if !fn(f) {
return
}
}
@@ -392,7 +471,7 @@ func (db *Instance) withNeed(folder, device []byte, truncate bool, fn Iterator)
continue
}
fk = db.deviceKeyInto(fk[:cap(fk)], folder, vl.Versions[i].Device, name)
fk = db.deviceKeyInto(fk, folder, vl.Versions[i].Device, name)
bs, err := t.Get(fk, nil)
if err != nil {
l.Debugln("surprise error:", err)
@@ -412,7 +491,7 @@ func (db *Instance) withNeed(folder, device []byte, truncate bool, fn Iterator)
l.Debugf("need folder=%q device=%v name=%q need=%v have=%v invalid=%v haveV=%v globalV=%v globalDev=%v", folder, protocol.DeviceIDFromBytes(device), name, need, have, haveFileVersion.Invalid, haveFileVersion.Version, needVersion, needDevice)
if cont := fn(gf); !cont {
if !fn(gf) {
return
}
@@ -478,10 +557,13 @@ func (db *Instance) dropDeviceFolder(device, folder []byte, meta *metadataTracke
dbi := t.NewIterator(util.BytesPrefix(db.deviceKey(folder, device, nil)), nil)
defer dbi.Release()
var gk []byte
for dbi.Next() {
key := dbi.Key()
name := db.deviceKeyName(key)
t.removeFromGlobal(folder, device, name, meta)
gk = db.globalKeyInto(gk, folder, name)
t.removeFromGlobal(gk, folder, device, name, meta)
t.Delete(key)
t.checkFlush()
}
@@ -512,8 +594,7 @@ func (db *Instance) checkGlobals(folder []byte, meta *metadataTracker) {
name := db.globalKeyName(gk)
var newVL VersionList
for i, version := range vl.Versions {
fk = db.deviceKeyInto(fk[:cap(fk)], folder, version.Device, name)
fk = db.deviceKeyInto(fk, folder, version.Device, name)
_, err := t.Get(fk, nil)
if err == leveldb.ErrNotFound {
continue
@@ -525,7 +606,7 @@ func (db *Instance) checkGlobals(folder []byte, meta *metadataTracker) {
newVL.Versions = append(newVL.Versions, version)
if i == 0 {
if fi, ok := t.getFile(folder, version.Device, name); ok {
if fi, ok := db.getFile(fk); ok {
meta.addFile(globalDeviceID, fi)
}
}
@@ -550,18 +631,20 @@ func (db *Instance) updateSchema0to1() {
changedFolders := make(map[string]struct{})
ignAdded := 0
meta := newMetadataTracker() // dummy metadata tracker
var gk []byte
for dbi.Next() {
folder := db.deviceKeyFolder(dbi.Key())
device := db.deviceKeyDevice(dbi.Key())
name := string(db.deviceKeyName(dbi.Key()))
name := db.deviceKeyName(dbi.Key())
// Remove files with absolute path (see #4799)
if strings.HasPrefix(name, "/") {
if strings.HasPrefix(string(name), "/") {
if _, ok := changedFolders[string(folder)]; !ok {
changedFolders[string(folder)] = struct{}{}
}
t.removeFromGlobal(folder, device, nil, nil)
gk = db.globalKeyInto(gk, folder, name)
t.removeFromGlobal(gk, folder, device, nil, nil)
t.Delete(dbi.Key())
t.checkFlush()
continue
@@ -590,7 +673,8 @@ func (db *Instance) updateSchema0to1() {
// Add invalid files to global list
if f.Invalid {
if t.updateGlobal(folder, device, f, meta) {
gk = db.globalKeyInto(gk, folder, name)
if t.updateGlobal(gk, folder, device, f, meta) {
if _, ok := changedFolders[string(folder)]; !ok {
changedFolders[string(folder)] = struct{}{}
}
@@ -606,6 +690,25 @@ func (db *Instance) updateSchema0to1() {
l.Infof("Updated symlink type for %d index entries and added %d invalid files to global list", symlinkConv, ignAdded)
}
func (db *Instance) updateSchema1to2() {
t := db.newReadWriteTransaction()
defer t.close()
var sk []byte
var dk []byte
for _, folderStr := range db.ListFolders() {
folder := []byte(folderStr)
db.withHave(folder, protocol.LocalDeviceID[:], nil, true, func(f FileIntf) bool {
sk = db.sequenceKeyInto(sk, folder, f.SequenceNo())
dk = db.deviceKeyInto(dk, folder, protocol.LocalDeviceID[:], []byte(f.FileName()))
t.Put(sk, dk)
t.checkFlush()
return true
})
}
}
// deviceKey returns a byte slice encoding the following information:
// keyTypeDevice (1 byte)
// folder (4 bytes)
@@ -615,16 +718,14 @@ func (db *Instance) deviceKey(folder, device, file []byte) []byte {
return db.deviceKeyInto(nil, folder, device, file)
}
func (db *Instance) deviceKeyInto(k []byte, folder, device, file []byte) []byte {
func (db *Instance) deviceKeyInto(k, folder, device, file []byte) []byte {
reqLen := keyPrefixLen + keyFolderLen + keyDeviceLen + len(file)
if len(k) < reqLen {
k = make([]byte, reqLen)
}
k = resize(k, reqLen)
k[0] = KeyTypeDevice
binary.BigEndian.PutUint32(k[keyPrefixLen:], db.folderIdx.ID(folder))
binary.BigEndian.PutUint32(k[keyPrefixLen+keyFolderLen:], db.deviceIdx.ID(device))
copy(k[keyPrefixLen+keyFolderLen+keyDeviceLen:], file)
return k[:reqLen]
return k
}
// deviceKeyName returns the device ID from the key
@@ -655,11 +756,16 @@ func (db *Instance) deviceKeyDevice(key []byte) []byte {
// folder (4 bytes)
// name (variable size)
func (db *Instance) globalKey(folder, file []byte) []byte {
k := make([]byte, keyPrefixLen+keyFolderLen+len(file))
k[0] = KeyTypeGlobal
binary.BigEndian.PutUint32(k[keyPrefixLen:], db.folderIdx.ID(folder))
copy(k[keyPrefixLen+keyFolderLen:], file)
return k
return db.globalKeyInto(nil, folder, file)
}
func (db *Instance) globalKeyInto(gk, folder, file []byte) []byte {
reqLen := keyPrefixLen + keyFolderLen + len(file)
gk = resize(gk, reqLen)
gk[0] = KeyTypeGlobal
binary.BigEndian.PutUint32(gk[keyPrefixLen:], db.folderIdx.ID(folder))
copy(gk[keyPrefixLen+keyFolderLen:], file)
return gk[:reqLen]
}
// globalKeyName returns the filename from the key
@@ -672,6 +778,28 @@ func (db *Instance) globalKeyFolder(key []byte) ([]byte, bool) {
return db.folderIdx.Val(binary.BigEndian.Uint32(key[keyPrefixLen:]))
}
// sequenceKey returns a byte slice encoding the following information:
// KeyTypeSequence (1 byte)
// folder (4 bytes)
// sequence number (8 bytes)
func (db *Instance) sequenceKey(folder []byte, seq int64) []byte {
return db.sequenceKeyInto(nil, folder, seq)
}
func (db *Instance) sequenceKeyInto(k []byte, folder []byte, seq int64) []byte {
reqLen := keyPrefixLen + keyFolderLen + keySequenceLen
k = resize(k, reqLen)
k[0] = KeyTypeSequence
binary.BigEndian.PutUint32(k[keyPrefixLen:], db.folderIdx.ID(folder))
binary.BigEndian.PutUint64(k[keyPrefixLen+keyFolderLen:], uint64(seq))
return k[:reqLen]
}
// sequenceKeySequence returns the sequence number from the key
func (db *Instance) sequenceKeySequence(key []byte) int64 {
return int64(binary.BigEndian.Uint64(key[keyPrefixLen+keyFolderLen:]))
}
func (db *Instance) getIndexID(device, folder []byte) protocol.IndexID {
key := db.indexIDKey(device, folder)
cur, err := db.Get(key, nil)
@@ -887,3 +1015,11 @@ func (i *smallIndex) Val(id uint32) ([]byte, bool) {
return []byte(val), true
}
// resize returns a byte array of length reqLen, reusing k if possible
func resize(k []byte, reqLen int) []byte {
if cap(k) < reqLen {
return make([]byte, reqLen)
}
return k[:reqLen]
}

View File

@@ -36,7 +36,7 @@ func (t readOnlyTransaction) close() {
}
func (t readOnlyTransaction) getFile(folder, device, file []byte) (protocol.FileInfo, bool) {
return getFile(t, t.db.deviceKey(folder, device, file))
return t.db.getFile(t.db.deviceKey(folder, device, file))
}
// A readWriteTransaction is a readOnlyTransaction plus a batch for writes.
@@ -74,21 +74,18 @@ func (t readWriteTransaction) flush() {
atomic.AddInt64(&t.db.committed, int64(t.Batch.Len()))
}
func (t readWriteTransaction) insertFile(folder, device []byte, file protocol.FileInfo) {
func (t readWriteTransaction) insertFile(fk, folder, device []byte, file protocol.FileInfo) {
l.Debugf("insert; folder=%q device=%v %v", folder, protocol.DeviceIDFromBytes(device), file)
name := []byte(file.Name)
nk := t.db.deviceKey(folder, device, name)
t.Put(nk, mustMarshal(&file))
t.Put(fk, mustMarshal(&file))
}
// updateGlobal adds this device+version to the version list for the given
// file. If the device is already present in the list, the version is updated.
// If the file does not have an entry in the global list, it is created.
func (t readWriteTransaction) updateGlobal(folder, device []byte, file protocol.FileInfo, meta *metadataTracker) bool {
func (t readWriteTransaction) updateGlobal(gk, folder, device []byte, file protocol.FileInfo, meta *metadataTracker) bool {
l.Debugf("update global; folder=%q device=%v file=%q version=%v invalid=%v", folder, protocol.DeviceIDFromBytes(device), file.Name, file.Version, file.Invalid)
name := []byte(file.Name)
gk := t.db.globalKey(folder, name)
svl, _ := t.Get(gk, nil) // skip error, we check len(svl) != 0 later
var fl VersionList
@@ -150,8 +147,7 @@ insert:
// to determine the winner.)
//
// A surprise missing file entry here is counted as a win for us.
of, ok := t.getFile(folder, fl.Versions[i].Device, name)
if !ok || file.WinsConflict(of) {
if of, ok := t.getFile(folder, fl.Versions[i].Device, name); !ok || file.WinsConflict(of) {
fl.Versions = insertVersion(fl.Versions, i, nv)
insertedAt = i
break insert
@@ -193,10 +189,9 @@ insert:
// removeFromGlobal removes the device from the global version list for the
// given file. If the version list is empty after this, the file entry is
// removed entirely.
func (t readWriteTransaction) removeFromGlobal(folder, device, file []byte, meta *metadataTracker) {
func (t readWriteTransaction) removeFromGlobal(gk, folder, device, file []byte, meta *metadataTracker) {
l.Debugf("remove from global; folder=%q device=%v file=%q", folder, protocol.DeviceIDFromBytes(device), file)
gk := t.db.globalKey(folder, file)
svl, err := t.Get(gk, nil)
if err != nil {
// We might be called to "remove" a global version that doesn't exist

View File

@@ -141,8 +141,11 @@ func (s *FileSet) Update(device protocol.DeviceID, fs []protocol.FileInfo) {
// filter slice according to https://github.com/golang/go/wiki/SliceTricks#filtering-without-allocating
oldFs := fs
fs = fs[:0]
var dk []byte
folder := []byte(s.folder)
for _, nf := range oldFs {
ef, ok := s.db.getFile([]byte(s.folder), device[:], []byte(nf.Name))
dk = s.db.deviceKeyInto(dk, folder, device[:], []byte(osutil.NormalizedFilename(nf.Name)))
ef, ok := s.db.getFile(dk)
if ok && ef.Version.Equal(nf.Version) && ef.Invalid == nf.Invalid {
continue
}
@@ -157,6 +160,8 @@ func (s *FileSet) Update(device protocol.DeviceID, fs []protocol.FileInfo) {
}
s.blockmap.Discard(discards)
s.blockmap.Update(updates)
s.db.removeSequences(folder, discards)
s.db.addSequences(folder, updates)
}
s.db.updateFiles([]byte(s.folder), device[:], fs, s.meta)
@@ -183,6 +188,11 @@ func (s *FileSet) WithHaveTruncated(device protocol.DeviceID, fn Iterator) {
s.db.withHave([]byte(s.folder), device[:], nil, true, nativeFileIterator(fn))
}
func (s *FileSet) WithHaveSequence(startSeq int64, fn Iterator) {
l.Debugf("%s WithHaveSequence(%v)", s.folder, startSeq)
s.db.withHaveSequence([]byte(s.folder), startSeq, nativeFileIterator(fn))
}
func (s *FileSet) WithPrefixedHaveTruncated(device protocol.DeviceID, prefix string, fn Iterator) {
l.Debugf("%s WithPrefixedHaveTruncated(%v)", s.folder, device)
s.db.withHave([]byte(s.folder), device[:], []byte(osutil.NormalizedFilename(prefix)), true, nativeFileIterator(fn))
@@ -203,7 +213,7 @@ func (s *FileSet) WithPrefixedGlobalTruncated(prefix string, fn Iterator) {
}
func (s *FileSet) Get(device protocol.DeviceID, file string) (protocol.FileInfo, bool) {
f, ok := s.db.getFile([]byte(s.folder), device[:], []byte(osutil.NormalizedFilename(file)))
f, ok := s.db.getFile(s.db.deviceKey([]byte(s.folder), device[:], []byte(osutil.NormalizedFilename(file))))
f.Name = osutil.NativeFilename(f.Name)
return f, ok
}

View File

@@ -865,6 +865,33 @@ func TestIssue4701(t *testing.T) {
}
}
func TestWithHaveSequence(t *testing.T) {
ldb := db.OpenMemory()
folder := "test)"
s := db.NewFileSet(folder, fs.NewFilesystem(fs.FilesystemTypeBasic, "."), ldb)
// The files must not be in alphabetical order
localHave := fileList{
protocol.FileInfo{Name: "e", Version: protocol.Vector{Counters: []protocol.Counter{{ID: myID, Value: 1003}}}, Invalid: true},
protocol.FileInfo{Name: "b", Version: protocol.Vector{Counters: []protocol.Counter{{ID: myID, Value: 1001}}}, Blocks: genBlocks(2)},
protocol.FileInfo{Name: "d", Version: protocol.Vector{Counters: []protocol.Counter{{ID: myID, Value: 1003}}}, Blocks: genBlocks(7)},
protocol.FileInfo{Name: "a", Version: protocol.Vector{Counters: []protocol.Counter{{ID: myID, Value: 1000}}}, Blocks: genBlocks(1)},
protocol.FileInfo{Name: "c", Version: protocol.Vector{Counters: []protocol.Counter{{ID: myID, Value: 1002}}}, Blocks: genBlocks(5), Invalid: true},
}
replace(s, protocol.LocalDeviceID, localHave)
i := 2
s.WithHaveSequence(int64(i), func(fi db.FileIntf) bool {
if f := fi.(protocol.FileInfo); !f.IsEquivalent(localHave[i-1], false, false) {
t.Fatalf("Got %v\nExpected %v", f, localHave[i-1])
}
i++
return true
})
}
func replace(fs *db.FileSet, device protocol.DeviceID, files []protocol.FileInfo) {
fs.Drop(device)
fs.Update(device, files)