lib/db: Use different defaults for larger databases (fixes #5966) (#5967)

This introduces a better set of defaults for large databases. I've
experimentally determined that it results in much better throughput in a
couple of scenarios with large databases, but I can't give any
guarantees the values are always optimal. They're probably no worse than
the defaults though.
This commit is contained in:
Jakob Borg
2019-08-20 09:41:41 +02:00
committed by Simon Frei
parent e910acdc17
commit 90b70c7a16
7 changed files with 174 additions and 18 deletions

View File

@@ -23,11 +23,26 @@ import (
const (
dbMaxOpenFiles = 100
dbWriteBuffer = 16 << 20
dbFlushBatch = 4 << MiB
// A large database is > 200 MiB. It's a mostly arbitrary value, but
// it's also the case that each file is 2 MiB by default and when we
// have dbMaxOpenFiles of them we will need to start thrashing fd:s.
// Switching to large database settings causes larger files to be used
// when compacting, reducing the number.
dbLargeThreshold = dbMaxOpenFiles * (2 << MiB)
KiB = 10
MiB = 20
)
var (
dbFlushBatch = debugEnvValue("WriteBuffer", dbWriteBuffer) / 4 // Some leeway for any leveldb in-memory optimizations
type Tuning int
const (
// N.b. these constants must match those in lib/config.Tuning!
TuningAuto Tuning = iota
TuningSmall
TuningLarge
)
// Lowlevel is the lowest level database interface. It has a very simple
@@ -49,18 +64,58 @@ type Lowlevel struct {
// Open attempts to open the database at the given location, and runs
// recovery on it if opening fails. Worst case, if recovery is not possible,
// the database is erased and created from scratch.
func Open(location string) (*Lowlevel, error) {
func Open(location string, tuning Tuning) (*Lowlevel, error) {
opts := optsFor(location, tuning)
return open(location, opts)
}
// optsFor returns the database options to use when opening a database with
// the given location and tuning. Settings can be overridden by debug
// environment variables.
func optsFor(location string, tuning Tuning) *opt.Options {
large := false
switch tuning {
case TuningLarge:
large = true
case TuningAuto:
large = dbIsLarge(location)
}
var (
// Set defaults used for small databases.
defaultBlockCacheCapacity = 0 // 0 means let leveldb use default
defaultBlockSize = 0
defaultCompactionTableSize = 0
defaultCompactionTableSizeMultiplier = 0
defaultWriteBuffer = 16 << MiB // increased from leveldb default of 4 MiB
defaultCompactionL0Trigger = opt.DefaultCompactionL0Trigger // explicit because we use it as base for other stuff
)
if large {
// Change the parameters for better throughput at the price of some
// RAM and larger files. This results in larger batches of writes
// and compaction at a lower frequency.
l.Infoln("Using large-database tuning")
defaultBlockCacheCapacity = 64 << MiB
defaultBlockSize = 64 << KiB
defaultCompactionTableSize = 16 << MiB
defaultCompactionTableSizeMultiplier = 20 // 2.0 after division by ten
defaultWriteBuffer = 64 << MiB
defaultCompactionL0Trigger = 8 // number of l0 files
}
opts := &opt.Options{
BlockCacheCapacity: debugEnvValue("BlockCacheCapacity", 0),
BlockCacheCapacity: debugEnvValue("BlockCacheCapacity", defaultBlockCacheCapacity),
BlockCacheEvictRemoved: debugEnvValue("BlockCacheEvictRemoved", 0) != 0,
BlockRestartInterval: debugEnvValue("BlockRestartInterval", 0),
BlockSize: debugEnvValue("BlockSize", 0),
BlockSize: debugEnvValue("BlockSize", defaultBlockSize),
CompactionExpandLimitFactor: debugEnvValue("CompactionExpandLimitFactor", 0),
CompactionGPOverlapsFactor: debugEnvValue("CompactionGPOverlapsFactor", 0),
CompactionL0Trigger: debugEnvValue("CompactionL0Trigger", 0),
CompactionL0Trigger: debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger),
CompactionSourceLimitFactor: debugEnvValue("CompactionSourceLimitFactor", 0),
CompactionTableSize: debugEnvValue("CompactionTableSize", 0),
CompactionTableSizeMultiplier: float64(debugEnvValue("CompactionTableSizeMultiplier", 0)) / 10.0,
CompactionTableSize: debugEnvValue("CompactionTableSize", defaultCompactionTableSize),
CompactionTableSizeMultiplier: float64(debugEnvValue("CompactionTableSizeMultiplier", defaultCompactionTableSizeMultiplier)) / 10.0,
CompactionTotalSize: debugEnvValue("CompactionTotalSize", 0),
CompactionTotalSizeMultiplier: float64(debugEnvValue("CompactionTotalSizeMultiplier", 0)) / 10.0,
DisableBufferPool: debugEnvValue("DisableBufferPool", 0) != 0,
@@ -70,15 +125,16 @@ func Open(location string) (*Lowlevel, error) {
NoSync: debugEnvValue("NoSync", 0) != 0,
NoWriteMerge: debugEnvValue("NoWriteMerge", 0) != 0,
OpenFilesCacheCapacity: debugEnvValue("OpenFilesCacheCapacity", dbMaxOpenFiles),
WriteBuffer: debugEnvValue("WriteBuffer", dbWriteBuffer),
WriteBuffer: debugEnvValue("WriteBuffer", defaultWriteBuffer),
// The write slowdown and pause can be overridden, but even if they
// are not and the compaction trigger is overridden we need to
// adjust so that we don't pause writes for L0 compaction before we
// even *start* L0 compaction...
WriteL0SlowdownTrigger: debugEnvValue("WriteL0SlowdownTrigger", 2*debugEnvValue("CompactionL0Trigger", opt.DefaultCompactionL0Trigger)),
WriteL0PauseTrigger: debugEnvValue("WriteL0SlowdownTrigger", 3*debugEnvValue("CompactionL0Trigger", opt.DefaultCompactionL0Trigger)),
WriteL0SlowdownTrigger: debugEnvValue("WriteL0SlowdownTrigger", 2*debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger)),
WriteL0PauseTrigger: debugEnvValue("WriteL0SlowdownTrigger", 3*debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger)),
}
return open(location, opts)
return opts
}
// OpenRO attempts to open the database at the given location, read only.
@@ -114,6 +170,7 @@ func open(location string, opts *opt.Options) (*Lowlevel, error) {
l.Warnln("Compacting database:", err)
}
}
return NewLowlevel(db, location), nil
}
@@ -207,6 +264,31 @@ func (db *Lowlevel) Close() {
db.DB.Close()
}
// dbIsLarge returns whether the estimated size of the database at location
// is large enough to warrant optimization for large databases.
func dbIsLarge(location string) bool {
dir, err := os.Open(location)
if err != nil {
return false
}
fis, err := dir.Readdir(-1)
if err != nil {
return false
}
var size int64
for _, fi := range fis {
if fi.Name() == "LOG" {
// don't count the size
continue
}
size += fi.Size()
}
return size > dbLargeThreshold
}
// NewLowlevel wraps the given *leveldb.DB into a *lowlevel
func NewLowlevel(db *leveldb.DB, location string) *Lowlevel {
return &Lowlevel{