erigon-pulse/ethdb/bitmapdb/dbutils.go
ledgerwatch dffcc57ecc
[erigon2] Fix history bug, optimise bitmap search (#3510)
* Fix history bug, optimise bitmap search

* Optimise SeekInBitmap64

* Update erigon-lib

Co-authored-by: Alexey Sharp <alexeysharp@Alexeys-iMac.local>
2022-02-13 20:02:18 +00:00

327 lines
8.9 KiB
Go

package bitmapdb
import (
"bytes"
"encoding/binary"
"sort"
"github.com/RoaringBitmap/roaring"
"github.com/RoaringBitmap/roaring/roaring64"
"github.com/c2h5oh/datasize"
libcommon "github.com/ledgerwatch/erigon-lib/common"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon/common/math"
"github.com/ledgerwatch/erigon/ethdb"
)
const ChunkLimit = uint64(1950 * datasize.B) // threshold beyond which MDBX overflow pages appear: 4096 / 2 - (keySize + 8)
// CutLeft - cut from bitmap `targetSize` bytes from left
// removing lft part from `bm`
// returns nil on zero cardinality
func CutLeft(bm *roaring.Bitmap, sizeLimit uint64) *roaring.Bitmap {
if bm.GetCardinality() == 0 {
return nil
}
sz := bm.GetSerializedSizeInBytes()
if sz <= sizeLimit {
lft := roaring.New()
lft.AddRange(uint64(bm.Minimum()), uint64(bm.Maximum())+1)
lft.And(bm)
lft.RunOptimize()
bm.Clear()
return lft
}
from := uint64(bm.Minimum())
minMax := bm.Maximum() - bm.Minimum()
to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability
lft := roaring.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger
lft.AddRange(from, from+uint64(i)+1)
lft.And(bm)
lft.RunOptimize()
return lft.GetSerializedSizeInBytes() > sizeLimit
})
lft := roaring.New()
lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower
lft.And(bm)
bm.RemoveRange(from, from+uint64(to))
lft.RunOptimize()
return lft
}
func WalkChunks(bm *roaring.Bitmap, sizeLimit uint64, f func(chunk *roaring.Bitmap, isLast bool) error) error {
for bm.GetCardinality() > 0 {
if err := f(CutLeft(bm, sizeLimit), bm.GetCardinality() == 0); err != nil {
return err
}
}
return nil
}
func WalkChunkWithKeys(k []byte, m *roaring.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring.Bitmap) error) error {
return WalkChunks(m, sizeLimit, func(chunk *roaring.Bitmap, isLast bool) error {
chunkKey := make([]byte, len(k)+4)
copy(chunkKey, k)
if isLast {
binary.BigEndian.PutUint32(chunkKey[len(k):], ^uint32(0))
} else {
binary.BigEndian.PutUint32(chunkKey[len(k):], chunk.Maximum())
}
return f(chunkKey, chunk)
})
}
// TruncateRange - gets existing bitmap in db and call RemoveRange operator on it.
// starts from hot shard, stops when shard not overlap with [from-to)
// !Important: [from, to)
func TruncateRange(db kv.RwTx, bucket string, key []byte, to uint32) error {
chunkKey := make([]byte, len(key)+4)
copy(chunkKey, key)
binary.BigEndian.PutUint32(chunkKey[len(chunkKey)-4:], to)
bm, err := Get(db, bucket, key, to, math.MaxUint32)
if err != nil {
return err
}
if bm.GetCardinality() > 0 && to <= bm.Maximum() {
bm.RemoveRange(uint64(to), uint64(bm.Maximum())+1)
}
c, err := db.Cursor(bucket)
if err != nil {
return err
}
defer c.Close()
if err := ethdb.Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) {
if !bytes.HasPrefix(k, key) {
return false, nil
}
if err := db.Delete(bucket, k, nil); err != nil {
return false, err
}
return true, nil
}); err != nil {
return err
}
buf := bytes.NewBuffer(nil)
return WalkChunkWithKeys(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring.Bitmap) error {
buf.Reset()
if _, err := chunk.WriteTo(buf); err != nil {
return err
}
return db.Put(bucket, chunkKey, libcommon.Copy(buf.Bytes()))
})
}
// Get - reading as much chunks as needed to satisfy [from, to] condition
// join all chunks to 1 bitmap by Or operator
func Get(db kv.Tx, bucket string, key []byte, from, to uint32) (*roaring.Bitmap, error) {
var chunks []*roaring.Bitmap
fromKey := make([]byte, len(key)+4)
copy(fromKey, key)
binary.BigEndian.PutUint32(fromKey[len(fromKey)-4:], from)
c, err := db.Cursor(bucket)
if err != nil {
return nil, err
}
defer c.Close()
for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() {
if err != nil {
return nil, err
}
if !bytes.HasPrefix(k, key) {
break
}
bm := roaring.New()
if _, err := bm.ReadFrom(bytes.NewReader(v)); err != nil {
return nil, err
}
chunks = append(chunks, bm)
if binary.BigEndian.Uint32(k[len(k)-4:]) >= to {
break
}
}
if len(chunks) == 0 {
return roaring.New(), nil
}
return roaring.FastOr(chunks...), nil
}
// SeekInBitmap - returns value in bitmap which is >= n
//nolint:deadcode
func SeekInBitmap(m *roaring.Bitmap, n uint32) (found uint32, ok bool) {
i := m.Iterator()
i.AdvanceIfNeeded(n)
ok = i.HasNext()
if ok {
found = i.Next()
}
return found, ok
}
// CutLeft - cut from bitmap `targetSize` bytes from left
// removing lft part from `bm`
// returns nil on zero cardinality
func CutLeft64(bm *roaring64.Bitmap, sizeLimit uint64) *roaring64.Bitmap {
if bm.GetCardinality() == 0 {
return nil
}
sz := bm.GetSerializedSizeInBytes()
if sz <= sizeLimit {
lft := roaring64.New()
lft.AddRange(bm.Minimum(), bm.Maximum()+1)
lft.And(bm)
lft.RunOptimize()
bm.Clear()
return lft
}
from := bm.Minimum()
minMax := bm.Maximum() - bm.Minimum()
to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability
lft := roaring64.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger
lft.AddRange(from, from+uint64(i)+1)
lft.And(bm)
lft.RunOptimize()
return lft.GetSerializedSizeInBytes() > sizeLimit
})
lft := roaring64.New()
lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower
lft.And(bm)
bm.RemoveRange(from, from+uint64(to))
lft.RunOptimize()
return lft
}
func WalkChunks64(bm *roaring64.Bitmap, sizeLimit uint64, f func(chunk *roaring64.Bitmap, isLast bool) error) error {
for bm.GetCardinality() > 0 {
if err := f(CutLeft64(bm, sizeLimit), bm.GetCardinality() == 0); err != nil {
return err
}
}
return nil
}
func WalkChunkWithKeys64(k []byte, m *roaring64.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring64.Bitmap) error) error {
return WalkChunks64(m, sizeLimit, func(chunk *roaring64.Bitmap, isLast bool) error {
chunkKey := make([]byte, len(k)+8)
copy(chunkKey, k)
if isLast {
binary.BigEndian.PutUint64(chunkKey[len(k):], ^uint64(0))
} else {
binary.BigEndian.PutUint64(chunkKey[len(k):], chunk.Maximum())
}
return f(chunkKey, chunk)
})
}
// TruncateRange - gets existing bitmap in db and call RemoveRange operator on it.
// starts from hot shard, stops when shard not overlap with [from-to)
// !Important: [from, to)
func TruncateRange64(db kv.RwTx, bucket string, key []byte, to uint64) error {
chunkKey := make([]byte, len(key)+8)
copy(chunkKey, key)
binary.BigEndian.PutUint64(chunkKey[len(chunkKey)-8:], to)
bm, err := Get64(db, bucket, key, to, math.MaxUint64)
if err != nil {
return err
}
if bm.GetCardinality() > 0 && to <= bm.Maximum() {
bm.RemoveRange(to, bm.Maximum()+1)
}
c, err := db.Cursor(bucket)
if err != nil {
return err
}
defer c.Close()
cDel, err := db.RwCursor(bucket)
if err != nil {
return err
}
defer cDel.Close()
if err := ethdb.Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) {
if !bytes.HasPrefix(k, key) {
return false, nil
}
if err := cDel.Delete(k, nil); err != nil {
return false, err
}
return true, nil
}); err != nil {
return err
}
buf := bytes.NewBuffer(nil)
return WalkChunkWithKeys64(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring64.Bitmap) error {
buf.Reset()
if _, err := chunk.WriteTo(buf); err != nil {
return err
}
return db.Put(bucket, chunkKey, libcommon.Copy(buf.Bytes()))
})
}
// Get - reading as much chunks as needed to satisfy [from, to] condition
// join all chunks to 1 bitmap by Or operator
func Get64(db kv.Tx, bucket string, key []byte, from, to uint64) (*roaring64.Bitmap, error) {
var chunks []*roaring64.Bitmap
fromKey := make([]byte, len(key)+8)
copy(fromKey, key)
binary.BigEndian.PutUint64(fromKey[len(fromKey)-8:], from)
c, err := db.Cursor(bucket)
if err != nil {
return nil, err
}
defer c.Close()
for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() {
if err != nil {
return nil, err
}
if !bytes.HasPrefix(k, key) {
break
}
bm := roaring64.New()
_, err := bm.ReadFrom(bytes.NewReader(v))
if err != nil {
return nil, err
}
chunks = append(chunks, bm)
if binary.BigEndian.Uint64(k[len(k)-8:]) >= to {
break
}
}
if len(chunks) == 0 {
return roaring64.New(), nil
}
return roaring64.FastOr(chunks...), nil
}
// SeekInBitmap - returns value in bitmap which is >= n
func SeekInBitmap64(m *roaring64.Bitmap, n uint64) (found uint64, ok bool) {
if m.IsEmpty() {
return 0, false
}
if n == 0 {
return m.Minimum(), true
}
searchRank := m.Rank(n - 1)
if searchRank >= m.GetCardinality() {
return 0, false
}
found, _ = m.Select(searchRank)
return found, true
}