erigon-pulse/ethdb/object_db.go
Alex Sharov e02d6acc7d
bitmap indices for logs (#1124)
* save progress

* try now

* don't create bloom inside rlpDecode

* don't create bloom inside ApplyTransaction

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* rename method

* print timings

* print timings

* print timings

* sort before flush

* fix err lint

* clean

* move tests to transactions

* compressed version

* up bound

* up bound

* more tests

* more tests

* more tests

* more tests

* better removal

* clean

* better performance of get/put methods

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* optimize rpcdaemon

* fix test

* fix rpcdaemon

* fix test

* simplify

* simplify

* fix nil pointer

* clean

* revert some changes

* add some logs

* clean

* try without optimize

* clean

* clean

* clean

* clean

* try

* move log_index to own stage

* move log_index to own stage

* integration add log_index stage

* integration add log_index stage

* clean

* clean

* print timing

* remove duplicates at unwind

* extract truncateBitmaps func

* try detect

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* add blackList of topics

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* clean

* sharding 1

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 2

* sharded 3

* sharded 3

* sharded 3

* speedup things by putCurrent and putReserve

* clean

* optimize trim

* clean

* remove blacklist

* add more info to err

* ?

* clean

* clean

* clean

* clean

* clean

* working version

* switch to cgo version of roaring bitmaps

* clean

* clean

* clean

* clean

* more docs

* clean

* clean

* fix logs bloom field

* Fix debug_getModifiedAccountsByNumber

* Try to fix crash

* fix problem with "absent block"

* fix problem with "absent block"

* remove optimize method call

* remove roaring iterator

* fix problem with rebuild indicess

* remove debug prints

* tests for eth_getLogs involving topics

* add tests for new stage, speparate topics into 2 buckets

* version up

* remove debug logs

* remove debug logs

* remove bloom filter implementation

* Optimisation

* Optimisatin not required, make rpctest lenient to geth errors

* Lenient to geth failures

Co-authored-by: Alexey Akhunov <akhounov@gmail.com>
2020-09-28 18:18:36 +01:00

476 lines
12 KiB
Go

// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package ethdb defines the interfaces for an Ethereum data store.
package ethdb
import (
"bytes"
"context"
"errors"
"fmt"
"github.com/ledgerwatch/turbo-geth/common"
"github.com/ledgerwatch/turbo-geth/common/dbutils"
"github.com/ledgerwatch/turbo-geth/common/debug"
"github.com/ledgerwatch/turbo-geth/core/types/accounts"
"github.com/ledgerwatch/turbo-geth/log"
"github.com/ledgerwatch/turbo-geth/metrics"
"strings"
"time"
)
var (
dbGetTimer = metrics.NewRegisteredTimer("db/get", nil)
dbPutTimer = metrics.NewRegisteredTimer("db/put", nil)
)
// ObjectDatabase - is an object-style interface of DB accessing
type ObjectDatabase struct {
kv KV
log log.Logger
id uint64
}
// NewObjectDatabase returns a AbstractDB wrapper.
func NewObjectDatabase(kv KV) *ObjectDatabase {
logger := log.New("database", "object")
return &ObjectDatabase{
kv: kv,
log: logger,
id: id(),
}
}
func MustOpen(path string) *ObjectDatabase {
db, err := Open(path)
if err != nil {
panic(err)
}
return db
}
// Open - main method to open database. Choosing driver based on path suffix.
// If env TEST_DB provided - choose driver based on it. Some test using this method to open non-in-memory db
func Open(path string) (*ObjectDatabase, error) {
var kv KV
var err error
testDB := debug.TestDB()
switch true {
case testDB == "lmdb" || strings.HasSuffix(path, "_lmdb"):
kv, err = NewLMDB().Path(path).Open()
default:
kv, err = NewLMDB().Path(path).Open()
}
if err != nil {
return nil, err
}
return NewObjectDatabase(kv), nil
}
// Put inserts or updates a single entry.
func (db *ObjectDatabase) Put(bucket string, key []byte, value []byte) error {
err := db.kv.Update(context.Background(), func(tx Tx) error {
return tx.Cursor(bucket).Put(key, value)
})
return err
}
// Append appends a single entry to the end of the bucket.
func (db *ObjectDatabase) Append(bucket string, key []byte, value []byte) error {
err := db.kv.Update(context.Background(), func(tx Tx) error {
return tx.Cursor(bucket).Append(key, value)
})
return err
}
// MultiPut - requirements: input must be sorted and without duplicates
func (db *ObjectDatabase) MultiPut(tuples ...[]byte) (uint64, error) {
err := db.kv.Update(context.Background(), func(tx Tx) error {
return MultiPut(tx, tuples...)
})
if err != nil {
return 0, err
}
return 0, nil
}
func (db *ObjectDatabase) Has(bucket string, key []byte) (bool, error) {
var has bool
err := db.kv.View(context.Background(), func(tx Tx) error {
v, err := tx.Get(bucket, key)
if err != nil {
return err
}
has = v != nil
return nil
})
return has, err
}
func (db *ObjectDatabase) DiskSize(ctx context.Context) (uint64, error) {
casted, ok := db.kv.(HasStats)
if !ok {
return 0, nil
}
return casted.DiskSize(ctx)
}
// Get returns the value for a given key if it's present.
func (db *ObjectDatabase) Get(bucket string, key []byte) ([]byte, error) {
var dat []byte
if err := db.kv.View(context.Background(), func(tx Tx) error {
v, err := tx.Get(bucket, key)
if err != nil {
return err
}
if v != nil {
dat = make([]byte, len(v))
copy(dat, v)
}
return nil
}); err != nil {
return nil, err
}
if dat == nil {
return nil, ErrKeyNotFound
}
return dat, nil
}
func (db *ObjectDatabase) Last(bucket string) ([]byte, []byte, error) {
var key, value []byte
if err := db.kv.View(context.Background(), func(tx Tx) error {
k, v, err := tx.Cursor(bucket).Last()
if err != nil {
return err
}
if k != nil {
key, value = common.CopyBytes(k), common.CopyBytes(v)
}
return nil
}); err != nil {
return nil, nil, err
}
return key, value, nil
}
// GetIndexChunk returns proper index chunk or return error if index is not created.
// key must contain inverted block number in the end
func (db *ObjectDatabase) GetIndexChunk(bucket string, key []byte, timestamp uint64) ([]byte, error) {
var dat []byte
err := db.kv.View(context.Background(), func(tx Tx) error {
c := tx.Cursor(bucket)
k, v, err := c.Seek(dbutils.IndexChunkKey(key, timestamp))
if err != nil {
return err
}
if !bytes.HasPrefix(k, dbutils.CompositeKeyWithoutIncarnation(key)) {
return ErrKeyNotFound
}
dat = make([]byte, len(v))
copy(dat, v)
return nil
})
if dat == nil {
return nil, ErrKeyNotFound
}
return dat, err
}
func GetChangeSetByBlock(db Getter, storage bool, timestamp uint64) ([]byte, error) {
key := dbutils.EncodeTimestamp(timestamp)
v, err := db.Get(dbutils.ChangeSetByIndexBucket(storage), key)
if err != nil && !errors.Is(ErrKeyNotFound, err) {
return nil, err
}
return v, nil
}
func (db *ObjectDatabase) Walk(bucket string, startkey []byte, fixedbits int, walker func(k, v []byte) (bool, error)) error {
err := db.kv.View(context.Background(), func(tx Tx) error {
return Walk(tx.Cursor(bucket), startkey, fixedbits, walker)
})
return err
}
func (db *ObjectDatabase) MultiWalk(bucket string, startkeys [][]byte, fixedbits []int, walker func(int, []byte, []byte) error) error {
return db.kv.View(context.Background(), func(tx Tx) error {
return MultiWalk(tx.Cursor(bucket), startkeys, fixedbits, walker)
})
}
// Delete deletes the key from the queue and database
func (db *ObjectDatabase) Delete(bucket string, key []byte) error {
// Execute the actual operation
err := db.kv.Update(context.Background(), func(tx Tx) error {
return tx.Cursor(bucket).Delete(key)
})
return err
}
func (db *ObjectDatabase) BucketExists(name string) (bool, error) {
exists := false
if err := db.kv.View(context.Background(), func(tx Tx) error {
migrator, ok := tx.(BucketMigrator)
if !ok {
return fmt.Errorf("%T doesn't implement ethdb.TxMigrator interface", db.kv)
}
exists = migrator.ExistsBucket(name)
return nil
}); err != nil {
return false, err
}
return exists, nil
}
func (db *ObjectDatabase) ClearBuckets(buckets ...string) error {
for i := range buckets {
name := buckets[i]
if err := db.kv.Update(context.Background(), func(tx Tx) error {
migrator, ok := tx.(BucketMigrator)
if !ok {
return fmt.Errorf("%T doesn't implement ethdb.TxMigrator interface", db.kv)
}
if err := migrator.ClearBucket(name); err != nil {
return err
}
return nil
}); err != nil {
return err
}
}
return nil
}
func (db *ObjectDatabase) DropBuckets(buckets ...string) error {
for i := range buckets {
name := buckets[i]
log.Info("Dropping bucket", "name", name)
if err := db.kv.Update(context.Background(), func(tx Tx) error {
migrator, ok := tx.(BucketMigrator)
if !ok {
return fmt.Errorf("%T doesn't implement ethdb.TxMigrator interface", db.kv)
}
if err := migrator.DropBucket(name); err != nil {
return err
}
return nil
}); err != nil {
return err
}
}
return nil
}
func (db *ObjectDatabase) Close() {
db.kv.Close()
}
func (db *ObjectDatabase) Keys() ([][]byte, error) {
var keys [][]byte
err := db.kv.View(context.Background(), func(tx Tx) error {
for _, name := range dbutils.Buckets {
var nameCopy = make([]byte, len(name))
copy(nameCopy, name)
return ForEach(tx.Cursor(name), func(k, _ []byte) (bool, error) {
var kCopy = make([]byte, len(k))
copy(kCopy, k)
keys = append(append(keys, nameCopy), kCopy)
return true, nil
})
}
return nil
})
if err != nil {
return nil, err
}
return keys, err
}
func (db *ObjectDatabase) KV() KV {
return db.kv
}
func (db *ObjectDatabase) MemCopy() *ObjectDatabase {
var mem *ObjectDatabase
// Open the db and recover any potential corruptions
switch db.kv.(type) {
case *LmdbKV:
mem = NewObjectDatabase(NewLMDB().InMem().MustOpen())
}
if err := db.kv.View(context.Background(), func(readTx Tx) error {
for _, name := range dbutils.Buckets {
name := name
if err := mem.kv.Update(context.Background(), func(writeTx Tx) error {
newBucketToWrite := writeTx.Cursor(name)
return ForEach(readTx.Cursor(name), func(k, v []byte) (bool, error) {
if err := newBucketToWrite.Put(common.CopyBytes(k), common.CopyBytes(v)); err != nil {
return false, err
}
return true, nil
})
}); err != nil {
return err
}
}
return nil
}); err != nil {
panic(err)
}
return mem
}
func (db *ObjectDatabase) NewBatch() DbWithPendingMutations {
m := &mutation{
db: db,
puts: newPuts(),
}
return m
}
func (db *ObjectDatabase) Begin(ctx context.Context) (DbWithPendingMutations, error) {
batch := &TxDb{db: db}
if err := batch.begin(ctx, nil); err != nil {
panic(err)
}
return batch, nil
}
// IdealBatchSize defines the size of the data batches should ideally add in one write.
func (db *ObjectDatabase) IdealBatchSize() int {
panic("only mutation hast preferred batch size, because it limited by RAM")
}
// [TURBO-GETH] Freezer support (not implemented yet)
// Ancients returns an error as we don't have a backing chain freezer.
func (db *ObjectDatabase) Ancients() (uint64, error) {
return 0, errNotSupported
}
// TruncateAncients returns an error as we don't have a backing chain freezer.
func (db *ObjectDatabase) TruncateAncients(items uint64) error {
return errNotSupported
}
func (db *ObjectDatabase) Reserve(bucket string, key []byte, i int) ([]byte, error) {
panic("supported only by TxDb")
}
// Type which expecting sequence of triplets: dbi, key, value, ....
// It sorts entries by dbi name, then inside dbi clusters sort by keys
type MultiPutTuples [][]byte
func (t MultiPutTuples) Len() int { return len(t) / 3 }
func (t MultiPutTuples) Less(i, j int) bool {
i3, j3 := i*3, j*3
cmp := bytes.Compare(t[i3], t[j3])
if cmp == -1 {
return true
}
if cmp == 0 {
return bytes.Compare(t[i3+1], t[j3+1]) == -1
}
return false
}
func (t MultiPutTuples) Swap(i, j int) {
i3, j3 := i*3, j*3
t[i3], t[j3] = t[j3], t[i3]
t[i3+1], t[j3+1] = t[j3+1], t[i3+1]
t[i3+2], t[j3+2] = t[j3+2], t[i3+2]
}
func Get(db KV, bucket string, key []byte) ([]byte, error) {
// Retrieve the key and increment the miss counter if not found
var dat []byte
err := db.View(context.Background(), func(tx Tx) error {
v, err := tx.Get(bucket, key)
if err != nil {
return err
}
if v != nil {
dat = make([]byte, len(v))
copy(dat, v)
}
return nil
})
if dat == nil {
return nil, ErrKeyNotFound
}
return dat, err
}
func HackAddRootToAccountBytes(accNoRoot []byte, root []byte) (accWithRoot []byte, err error) {
var acc accounts.Account
if err := acc.DecodeForStorage(accNoRoot); err != nil {
return nil, err
}
acc.Root = common.BytesToHash(root)
accWithRoot = make([]byte, acc.EncodingLengthForStorage())
acc.EncodeForStorage(accWithRoot)
return accWithRoot, nil
}
func Bytesmask(fixedbits int) (fixedbytes int, mask byte) {
fixedbytes = (fixedbits + 7) / 8
shiftbits := fixedbits & 7
mask = byte(0xff)
if shiftbits != 0 {
mask = 0xff << (8 - shiftbits)
}
return fixedbytes, mask
}
func InspectDatabase(db Database) error {
// FIXME: implement in Turbo-Geth
// see https://github.com/ethereum/go-ethereum/blob/f5d89cdb72c1e82e9deb54754bef8dd20bf12591/core/rawdb/database.go#L224
return errNotSupported
}
func NewDatabaseWithFreezer(db *ObjectDatabase, dir, suffix string) (*ObjectDatabase, error) {
// FIXME: implement freezer in Turbo-Geth
return db, nil
}
func WarmUp(tx Tx, bucket string, logEvery *time.Ticker, quit <-chan struct{}) error {
count := 0
c := tx.Cursor(bucket)
totalKeys, errCount := c.Count()
if errCount != nil {
return errCount
}
for k, _, err := c.First(); k != nil; k, _, err = c.Next() {
if err != nil {
return err
}
count++
select {
default:
case <-quit:
return common.ErrStopped
case <-logEvery.C:
log.Info("Warmed up state", "progress", fmt.Sprintf("%.2fM/%.2fM", float64(count)/1_000_000, float64(totalKeys)/1_000_000))
}
}
return nil
}