erigon-pulse/swarm/storage/localstore/gc.go
Janoš Guljaš 9a58a9b91a swarm/storage/localstore: global batch write lock (#19245)
* swarm/storage/localstore: most basic database

* swarm/storage/localstore: fix typos and comments

* swarm/shed: add uint64 field Dec and DecInBatch methods

* swarm/storage/localstore: decrement size counter on ModeRemoval update

* swarm/storage/localstore: unexport modeAccess and modeRemoval

* swarm/storage/localstore: add WithRetrievalCompositeIndex

* swarm/storage/localstore: add TestModeSyncing

* swarm/storage/localstore: fix test name

* swarm/storage/localstore: add TestModeUpload

* swarm/storage/localstore: add TestModeRequest

* swarm/storage/localstore: add TestModeSynced

* swarm/storage/localstore: add TestModeAccess

* swarm/storage/localstore: add TestModeRemoval

* swarm/storage/localstore: add mock store option for chunk data

* swarm/storage/localstore: add TestDB_pullIndex

* swarm/storage/localstore: add TestDB_gcIndex

* swarm/storage/localstore: change how batches are written

* swarm/storage/localstore: add updateOnAccess function

* swarm/storage/localhost: add DB.gcSize

* swarm/storage/localstore: update comments

* swarm/storage/localstore: add BenchmarkNew

* swarm/storage/localstore: add retrieval tests benchmarks

* swarm/storage/localstore: accessors redesign

* swarm/storage/localstore: add semaphore for updateGC goroutine

* swarm/storage/localstore: implement basic garbage collection

* swarm/storage/localstore: optimize collectGarbage

* swarm/storage/localstore: add more garbage collection tests cases

* swarm/shed, swarm/storage/localstore: rename IndexItem to Item

* swarm/shed: add Index.CountFrom

* swarm/storage/localstore: persist gcSize

* swarm/storage/localstore: remove composite retrieval index

* swarm/shed: IterateWithPrefix and IterateWithPrefixFrom Index functions

* swarm/storage/localstore: writeGCSize function with leveldb batch

* swarm/storage/localstore: unexport modeSetRemove

* swarm/storage/localstore: update writeGCSizeWorker comment

* swarm/storage/localstore: add triggerGarbageCollection function

* swarm/storage/localstore: call writeGCSize on DB Close

* swarm/storage/localstore: additional comment in writeGCSizeWorker

* swarm/storage/localstore: add MetricsPrefix option

* swarm/storage/localstore: fix a typo

* swamr/shed: only one Index Iterate function

* swarm/storage/localstore: use shed Iterate function

* swarm/shed: pass a new byte slice copy to index decode functions

* swarm/storage/localstore: implement feed subscriptions

* swarm/storage/localstore: add more subscriptions tests

* swarm/storage/localsore: add parallel upload test

* swarm/storage/localstore: use storage.MaxPO in subscription tests

* swarm/storage/localstore: subscription of addresses instead chunks

* swarm/storage/localstore: lock item address in collectGarbage iterator

* swarm/storage/localstore: fix TestSubscribePull to include MaxPO

* swarm/storage/localstore: improve subscriptions

* swarm/storage/localstore: add TestDB_SubscribePull_sinceAndUntil test

* swarm/storage/localstore: adjust pull sync tests

* swarm/storage/localstore: remove writeGCSizeDelay and use literal

* swarm/storage/localstore: adjust subscriptions tests delays and comments

* swarm/storage/localstore: add godoc package overview

* swarm/storage/localstore: fix a typo

* swarm/storage/localstore: update package overview

* swarm/storage/localstore: remove repeated index change

* swarm/storage/localstore: rename ChunkInfo to ChunkDescriptor

* swarm/storage/localstore: add comment in collectGarbageWorker

* swarm/storage/localstore: replace atomics with mutexes for gcSize and tests

* swarm/storage/localstore: protect addrs map in pull subs tests

* swarm/storage/localstore: protect slices in push subs test

* swarm/storage/localstore: protect chunks in TestModePutUpload_parallel

* swarm/storage/localstore: fix a race in TestDB_updateGCSem defers

* swarm/storage/localstore: remove parallel flag from tests

* swarm/storage/localstore: fix a race in testDB_collectGarbageWorker

* swarm/storage/localstore: remove unused code

* swarm/storage/localstore: add more context to pull sub log messages

* swarm/storage/localstore: BenchmarkPutUpload and global lock option

* swarm/storage/localstore: pre-generate chunks in BenchmarkPutUpload

* swarm/storage/localstore: correct useGlobalLock in collectGarbage

* swarm/storage/localstore: fix typos and update comments

* swarm/storage/localstore: update writeGCSize comment

* swarm/storage/localstore: global batch write lock

* swarm/storage/localstore: remove global lock option

* swarm/storage/localstore: simplify DB.Close
2019-03-09 00:06:39 +01:00

176 lines
5.1 KiB
Go

// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package localstore
import (
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/swarm/shed"
"github.com/syndtr/goleveldb/leveldb"
)
var (
// gcTargetRatio defines the target number of items
// in garbage collection index that will not be removed
// on garbage collection. The target number of items
// is calculated by gcTarget function. This value must be
// in range (0,1]. For example, with 0.9 value,
// garbage collection will leave 90% of defined capacity
// in database after its run. This prevents frequent
// garbage collection runs.
gcTargetRatio = 0.9
// gcBatchSize limits the number of chunks in a single
// leveldb batch on garbage collection.
gcBatchSize uint64 = 1000
)
// collectGarbageWorker is a long running function that waits for
// collectGarbageTrigger channel to signal a garbage collection
// run. GC run iterates on gcIndex and removes older items
// form retrieval and other indexes.
func (db *DB) collectGarbageWorker() {
defer close(db.collectGarbageWorkerDone)
for {
select {
case <-db.collectGarbageTrigger:
// run a single collect garbage run and
// if done is false, gcBatchSize is reached and
// another collect garbage run is needed
collectedCount, done, err := db.collectGarbage()
if err != nil {
log.Error("localstore collect garbage", "err", err)
}
// check if another gc run is needed
if !done {
db.triggerGarbageCollection()
}
if collectedCount > 0 && testHookCollectGarbage != nil {
testHookCollectGarbage(collectedCount)
}
case <-db.close:
return
}
}
}
// collectGarbage removes chunks from retrieval and other
// indexes if maximal number of chunks in database is reached.
// This function returns the number of removed chunks. If done
// is false, another call to this function is needed to collect
// the rest of the garbage as the batch size limit is reached.
// This function is called in collectGarbageWorker.
func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) {
batch := new(leveldb.Batch)
target := db.gcTarget()
// protect database from changing idexes and gcSize
db.batchMu.Lock()
defer db.batchMu.Unlock()
gcSize, err := db.gcSize.Get()
if err != nil {
return 0, true, err
}
done = true
err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) {
if gcSize-collectedCount <= target {
return true, nil
}
// delete from retrieve, pull, gc
db.retrievalDataIndex.DeleteInBatch(batch, item)
db.retrievalAccessIndex.DeleteInBatch(batch, item)
db.pullIndex.DeleteInBatch(batch, item)
db.gcIndex.DeleteInBatch(batch, item)
collectedCount++
if collectedCount >= gcBatchSize {
// bach size limit reached,
// another gc run is needed
done = false
return true, nil
}
return false, nil
}, nil)
if err != nil {
return 0, false, err
}
db.gcSize.PutInBatch(batch, gcSize-collectedCount)
err = db.shed.WriteBatch(batch)
if err != nil {
return 0, false, err
}
return collectedCount, done, nil
}
// gcTrigger retruns the absolute value for garbage collection
// target value, calculated from db.capacity and gcTargetRatio.
func (db *DB) gcTarget() (target uint64) {
return uint64(float64(db.capacity) * gcTargetRatio)
}
// triggerGarbageCollection signals collectGarbageWorker
// to call collectGarbage.
func (db *DB) triggerGarbageCollection() {
select {
case db.collectGarbageTrigger <- struct{}{}:
case <-db.close:
default:
}
}
// incGCSizeInBatch changes gcSize field value
// by change which can be negative. This function
// must be called under batchMu lock.
func (db *DB) incGCSizeInBatch(batch *leveldb.Batch, change int64) (err error) {
if change == 0 {
return nil
}
gcSize, err := db.gcSize.Get()
if err != nil {
return err
}
var new uint64
if change > 0 {
new = gcSize + uint64(change)
} else {
// 'change' is an int64 and is negative
// a conversion is needed with correct sign
c := uint64(-change)
if c > gcSize {
// protect uint64 undeflow
return nil
}
new = gcSize - c
}
db.gcSize.PutInBatch(batch, new)
// trigger garbage collection if we reached the capacity
if new >= db.capacity {
db.triggerGarbageCollection()
}
return nil
}
// testHookCollectGarbage is a hook that can provide
// information when a garbage collection run is done
// and how many items it removed.
var testHookCollectGarbage func(collectedCount uint64)