This introduces a better set of defaults for large databases. I've experimentally determined that it results in much better throughput in a couple of scenarios with large databases, but I can't give any guarantees the values are always optimal. They're probably no worse than the defaults though.
This commit is contained in:
@@ -23,11 +23,26 @@ import (
|
||||
|
||||
const (
|
||||
dbMaxOpenFiles = 100
|
||||
dbWriteBuffer = 16 << 20
|
||||
dbFlushBatch = 4 << MiB
|
||||
|
||||
// A large database is > 200 MiB. It's a mostly arbitrary value, but
|
||||
// it's also the case that each file is 2 MiB by default and when we
|
||||
// have dbMaxOpenFiles of them we will need to start thrashing fd:s.
|
||||
// Switching to large database settings causes larger files to be used
|
||||
// when compacting, reducing the number.
|
||||
dbLargeThreshold = dbMaxOpenFiles * (2 << MiB)
|
||||
|
||||
KiB = 10
|
||||
MiB = 20
|
||||
)
|
||||
|
||||
var (
|
||||
dbFlushBatch = debugEnvValue("WriteBuffer", dbWriteBuffer) / 4 // Some leeway for any leveldb in-memory optimizations
|
||||
type Tuning int
|
||||
|
||||
const (
|
||||
// N.b. these constants must match those in lib/config.Tuning!
|
||||
TuningAuto Tuning = iota
|
||||
TuningSmall
|
||||
TuningLarge
|
||||
)
|
||||
|
||||
// Lowlevel is the lowest level database interface. It has a very simple
|
||||
@@ -49,18 +64,58 @@ type Lowlevel struct {
|
||||
// Open attempts to open the database at the given location, and runs
|
||||
// recovery on it if opening fails. Worst case, if recovery is not possible,
|
||||
// the database is erased and created from scratch.
|
||||
func Open(location string) (*Lowlevel, error) {
|
||||
func Open(location string, tuning Tuning) (*Lowlevel, error) {
|
||||
opts := optsFor(location, tuning)
|
||||
return open(location, opts)
|
||||
}
|
||||
|
||||
// optsFor returns the database options to use when opening a database with
|
||||
// the given location and tuning. Settings can be overridden by debug
|
||||
// environment variables.
|
||||
func optsFor(location string, tuning Tuning) *opt.Options {
|
||||
large := false
|
||||
switch tuning {
|
||||
case TuningLarge:
|
||||
large = true
|
||||
case TuningAuto:
|
||||
large = dbIsLarge(location)
|
||||
}
|
||||
|
||||
var (
|
||||
// Set defaults used for small databases.
|
||||
defaultBlockCacheCapacity = 0 // 0 means let leveldb use default
|
||||
defaultBlockSize = 0
|
||||
defaultCompactionTableSize = 0
|
||||
defaultCompactionTableSizeMultiplier = 0
|
||||
defaultWriteBuffer = 16 << MiB // increased from leveldb default of 4 MiB
|
||||
defaultCompactionL0Trigger = opt.DefaultCompactionL0Trigger // explicit because we use it as base for other stuff
|
||||
)
|
||||
|
||||
if large {
|
||||
// Change the parameters for better throughput at the price of some
|
||||
// RAM and larger files. This results in larger batches of writes
|
||||
// and compaction at a lower frequency.
|
||||
l.Infoln("Using large-database tuning")
|
||||
|
||||
defaultBlockCacheCapacity = 64 << MiB
|
||||
defaultBlockSize = 64 << KiB
|
||||
defaultCompactionTableSize = 16 << MiB
|
||||
defaultCompactionTableSizeMultiplier = 20 // 2.0 after division by ten
|
||||
defaultWriteBuffer = 64 << MiB
|
||||
defaultCompactionL0Trigger = 8 // number of l0 files
|
||||
}
|
||||
|
||||
opts := &opt.Options{
|
||||
BlockCacheCapacity: debugEnvValue("BlockCacheCapacity", 0),
|
||||
BlockCacheCapacity: debugEnvValue("BlockCacheCapacity", defaultBlockCacheCapacity),
|
||||
BlockCacheEvictRemoved: debugEnvValue("BlockCacheEvictRemoved", 0) != 0,
|
||||
BlockRestartInterval: debugEnvValue("BlockRestartInterval", 0),
|
||||
BlockSize: debugEnvValue("BlockSize", 0),
|
||||
BlockSize: debugEnvValue("BlockSize", defaultBlockSize),
|
||||
CompactionExpandLimitFactor: debugEnvValue("CompactionExpandLimitFactor", 0),
|
||||
CompactionGPOverlapsFactor: debugEnvValue("CompactionGPOverlapsFactor", 0),
|
||||
CompactionL0Trigger: debugEnvValue("CompactionL0Trigger", 0),
|
||||
CompactionL0Trigger: debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger),
|
||||
CompactionSourceLimitFactor: debugEnvValue("CompactionSourceLimitFactor", 0),
|
||||
CompactionTableSize: debugEnvValue("CompactionTableSize", 0),
|
||||
CompactionTableSizeMultiplier: float64(debugEnvValue("CompactionTableSizeMultiplier", 0)) / 10.0,
|
||||
CompactionTableSize: debugEnvValue("CompactionTableSize", defaultCompactionTableSize),
|
||||
CompactionTableSizeMultiplier: float64(debugEnvValue("CompactionTableSizeMultiplier", defaultCompactionTableSizeMultiplier)) / 10.0,
|
||||
CompactionTotalSize: debugEnvValue("CompactionTotalSize", 0),
|
||||
CompactionTotalSizeMultiplier: float64(debugEnvValue("CompactionTotalSizeMultiplier", 0)) / 10.0,
|
||||
DisableBufferPool: debugEnvValue("DisableBufferPool", 0) != 0,
|
||||
@@ -70,15 +125,16 @@ func Open(location string) (*Lowlevel, error) {
|
||||
NoSync: debugEnvValue("NoSync", 0) != 0,
|
||||
NoWriteMerge: debugEnvValue("NoWriteMerge", 0) != 0,
|
||||
OpenFilesCacheCapacity: debugEnvValue("OpenFilesCacheCapacity", dbMaxOpenFiles),
|
||||
WriteBuffer: debugEnvValue("WriteBuffer", dbWriteBuffer),
|
||||
WriteBuffer: debugEnvValue("WriteBuffer", defaultWriteBuffer),
|
||||
// The write slowdown and pause can be overridden, but even if they
|
||||
// are not and the compaction trigger is overridden we need to
|
||||
// adjust so that we don't pause writes for L0 compaction before we
|
||||
// even *start* L0 compaction...
|
||||
WriteL0SlowdownTrigger: debugEnvValue("WriteL0SlowdownTrigger", 2*debugEnvValue("CompactionL0Trigger", opt.DefaultCompactionL0Trigger)),
|
||||
WriteL0PauseTrigger: debugEnvValue("WriteL0SlowdownTrigger", 3*debugEnvValue("CompactionL0Trigger", opt.DefaultCompactionL0Trigger)),
|
||||
WriteL0SlowdownTrigger: debugEnvValue("WriteL0SlowdownTrigger", 2*debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger)),
|
||||
WriteL0PauseTrigger: debugEnvValue("WriteL0SlowdownTrigger", 3*debugEnvValue("CompactionL0Trigger", defaultCompactionL0Trigger)),
|
||||
}
|
||||
return open(location, opts)
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
// OpenRO attempts to open the database at the given location, read only.
|
||||
@@ -114,6 +170,7 @@ func open(location string, opts *opt.Options) (*Lowlevel, error) {
|
||||
l.Warnln("Compacting database:", err)
|
||||
}
|
||||
}
|
||||
|
||||
return NewLowlevel(db, location), nil
|
||||
}
|
||||
|
||||
@@ -207,6 +264,31 @@ func (db *Lowlevel) Close() {
|
||||
db.DB.Close()
|
||||
}
|
||||
|
||||
// dbIsLarge returns whether the estimated size of the database at location
|
||||
// is large enough to warrant optimization for large databases.
|
||||
func dbIsLarge(location string) bool {
|
||||
dir, err := os.Open(location)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
fis, err := dir.Readdir(-1)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var size int64
|
||||
for _, fi := range fis {
|
||||
if fi.Name() == "LOG" {
|
||||
// don't count the size
|
||||
continue
|
||||
}
|
||||
size += fi.Size()
|
||||
}
|
||||
|
||||
return size > dbLargeThreshold
|
||||
}
|
||||
|
||||
// NewLowlevel wraps the given *leveldb.DB into a *lowlevel
|
||||
func NewLowlevel(db *leveldb.DB, location string) *Lowlevel {
|
||||
return &Lowlevel{
|
||||
|
||||
Reference in New Issue
Block a user