2019-05-13 12:28:01 +00:00
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
2020-08-29 07:24:50 +00:00
/ *
2019-05-13 12:28:01 +00:00
import (
"encoding/binary"
"fmt"
"sync"
"sync/atomic"
"time"
2019-05-27 13:51:49 +00:00
"github.com/ledgerwatch/turbo-geth/common"
2020-06-05 09:25:33 +00:00
"github.com/ledgerwatch/turbo-geth/common/dbutils"
2019-05-27 13:51:49 +00:00
"github.com/ledgerwatch/turbo-geth/ethdb"
"github.com/ledgerwatch/turbo-geth/log"
"github.com/ledgerwatch/turbo-geth/metrics"
2021-01-12 16:39:31 +00:00
bloomfilter "github.com/holiman/bloomfilter/v2"
2019-05-13 12:28:01 +00:00
)
var (
bloomAddMeter = metrics . NewRegisteredMeter ( "trie/bloom/add" , nil )
bloomLoadMeter = metrics . NewRegisteredMeter ( "trie/bloom/load" , nil )
bloomTestMeter = metrics . NewRegisteredMeter ( "trie/bloom/test" , nil )
bloomMissMeter = metrics . NewRegisteredMeter ( "trie/bloom/miss" , nil )
bloomFaultMeter = metrics . NewRegisteredMeter ( "trie/bloom/fault" , nil )
bloomErrorGauge = metrics . NewRegisteredGauge ( "trie/bloom/error" , nil )
)
// SyncBloom is a bloom filter used during fast sync to quickly decide if a trie
2020-08-21 12:10:40 +00:00
// node or contract code already exists on disk or not. It self populates from the
// provided disk database on creation in a background thread and will only start
// returning live results once that's finished.
2019-05-13 12:28:01 +00:00
type SyncBloom struct {
bloom * bloomfilter . Filter
inited uint32
closer sync . Once
closed uint32
pend sync . WaitGroup
}
// NewSyncBloom creates a new bloom filter of the given size (in megabytes) and
// initializes it from the database. The bloom is hard coded to use 3 filters.
2019-05-27 13:51:49 +00:00
func NewSyncBloom ( memory uint64 , database ethdb . Database ) * SyncBloom {
2019-05-13 12:28:01 +00:00
// Create the bloom filter to track known trie nodes
2021-01-12 16:39:31 +00:00
bloom , err := bloomfilter . New ( memory * 1024 * 1024 * 8 , 4 )
2019-05-13 12:28:01 +00:00
if err != nil {
2019-07-05 10:13:21 +00:00
panic ( fmt . Sprintf ( "failed to create bloom: %v" , err ) )
2019-05-13 12:28:01 +00:00
}
log . Info ( "Allocated fast sync bloom" , "size" , common . StorageSize ( memory * 1024 * 1024 ) )
// Assemble the fast sync bloom and init it from previous sessions
b := & SyncBloom {
bloom : bloom ,
}
b . pend . Add ( 2 )
go func ( ) {
defer b . pend . Done ( )
b . init ( database )
} ( )
go func ( ) {
defer b . pend . Done ( )
b . meter ( )
} ( )
return b
}
// init iterates over the database, pushing every trie hash into the bloom filter.
2019-05-27 13:51:49 +00:00
func ( b * SyncBloom ) init ( database ethdb . Database ) {
2019-05-13 12:28:01 +00:00
// Iterate over the database, but restart every now and again to avoid holding
// a persistent snapshot since fast sync can push a ton of data concurrently,
// bloating the disk.
//
// Note, this is fine, because everything inserted into leveldb by fast sync is
// also pushed into the bloom directly, so we're not missing anything when the
// iterator is swapped out for a new one.
var (
start = time . Now ( )
2019-05-27 13:51:49 +00:00
//swap = time.Now()
2019-05-13 12:28:01 +00:00
)
2019-05-27 13:51:49 +00:00
if atomic . LoadUint32 ( & b . closed ) == 0 {
2020-06-05 09:25:33 +00:00
_ = database . Walk ( dbutils . CurrentStateBucket , [ ] byte { } , 0 , func ( key , val [ ] byte ) ( bool , error ) {
2019-05-27 13:51:49 +00:00
// If the database entry is a trie node, add it to the bloom
if len ( key ) == common . HashLength {
2020-06-05 09:25:33 +00:00
b . bloom . Add ( syncBloomHasher ( common . CopyBytes ( key ) ) )
2019-05-27 13:51:49 +00:00
bloomLoadMeter . Mark ( 1 )
}
return true , nil
// FIXME: restore or remove in Turbo-Geth
2020-08-29 07:24:50 +00:00
// If enough time elapsed since the last iterator swap, restart
//if time.Since(swap) > 8*time.Second {
// key := common.CopyBytes(it.Key())
//
// it.Release()
// it = database.NewIteratorWithStart(key)
//
// log.Info("Initializing fast sync bloom", "items", b.bloom.N(), "errorrate", b.errorRate(), "elapsed", common.PrettyDuration(time.Since(start)))
// swap = time.Now()
//}
2019-05-27 13:51:49 +00:00
} )
2019-05-13 12:28:01 +00:00
}
// Mark the bloom filter inited and return
2021-01-12 16:39:31 +00:00
log . Info ( "Initialized state bloom" , "items" , b . bloom . N ( ) , "errorrate" , b . bloom . FalsePosititveProbability ( ) , "elapsed" , common . PrettyDuration ( time . Since ( start ) ) )
2019-05-13 12:28:01 +00:00
atomic . StoreUint32 ( & b . inited , 1 )
}
// meter periodically recalculates the false positive error rate of the bloom
// filter and reports it in a metric.
func ( b * SyncBloom ) meter ( ) {
for {
// Report the current error ration. No floats, lame, scale it up.
2021-01-12 16:39:31 +00:00
bloomErrorGauge . Update ( int64 ( b . bloom . FalsePosititveProbability ( ) * 100000 ) )
2019-05-13 12:28:01 +00:00
// Wait one second, but check termination more frequently
for i := 0 ; i < 10 ; i ++ {
if atomic . LoadUint32 ( & b . closed ) == 1 {
return
}
time . Sleep ( 100 * time . Millisecond )
}
}
}
// Close terminates any background initializer still running and releases all the
// memory allocated for the bloom.
func ( b * SyncBloom ) Close ( ) error {
b . closer . Do ( func ( ) {
// Ensure the initializer is stopped
atomic . StoreUint32 ( & b . closed , 1 )
b . pend . Wait ( )
// Wipe the bloom, but mark it "uninited" just in case someone attempts an access
2021-01-12 16:39:31 +00:00
log . Info ( "Deallocated state bloom" , "items" , b . bloom . N ( ) , "errorrate" , b . bloom . FalsePosititveProbability ( ) )
2019-05-13 12:28:01 +00:00
atomic . StoreUint32 ( & b . inited , 0 )
b . bloom = nil
} )
return nil
}
// Add inserts a new trie node hash into the bloom filter.
func ( b * SyncBloom ) Add ( hash [ ] byte ) {
if atomic . LoadUint32 ( & b . closed ) == 1 {
return
}
2021-01-12 16:39:31 +00:00
b . bloom . AddHash ( binary . BigEndian . Uint64 ( hash ) )
2019-05-13 12:28:01 +00:00
bloomAddMeter . Mark ( 1 )
}
// Contains tests if the bloom filter contains the given hash:
// - false: the bloom definitely does not contain hash
// - true: the bloom maybe contains hash
//
// While the bloom is being initialized, any query will return true.
func ( b * SyncBloom ) Contains ( hash [ ] byte ) bool {
bloomTestMeter . Mark ( 1 )
if atomic . LoadUint32 ( & b . inited ) == 0 {
// We didn't load all the trie nodes from the previous run of Geth yet. As
// such, we can't say for sure if a hash is not present for anything. Until
// the init is done, we're faking "possible presence" for everything.
return true
}
// Bloom initialized, check the real one and report any successful misses
2021-01-12 16:39:31 +00:00
maybe := b . bloom . ContainsHash ( binary . BigEndian . Uint64 ( hash ) )
2019-05-13 12:28:01 +00:00
if ! maybe {
bloomMissMeter . Mark ( 1 )
}
return maybe
}
// errorRate calculates the probability of a random containment test returning a
// false positive.
//
// We're calculating it ourselves because the bloom library we used missed a
// parentheses in the formula and calculates it wrong. And it's discontinued...
func ( b * SyncBloom ) errorRate ( ) float64 {
k := float64 ( b . bloom . K ( ) )
n := float64 ( b . bloom . N ( ) )
m := float64 ( b . bloom . M ( ) )
return math . Pow ( 1.0 - math . Exp ( ( - k ) * ( n + 0.5 ) / ( m - 1 ) ) , k )
}
2020-08-29 07:24:50 +00:00
* /