package bitmapdb import ( "bytes" "encoding/binary" "sort" "github.com/RoaringBitmap/roaring" "github.com/RoaringBitmap/roaring/roaring64" "github.com/c2h5oh/datasize" libcommon "github.com/ledgerwatch/erigon-lib/common" "github.com/ledgerwatch/erigon-lib/kv" "github.com/ledgerwatch/erigon/common/math" "github.com/ledgerwatch/erigon/ethdb" ) const ChunkLimit = uint64(1950 * datasize.B) // threshold beyond which MDBX overflow pages appear: 4096 / 2 - (keySize + 8) // CutLeft - cut from bitmap `targetSize` bytes from left // removing lft part from `bm` // returns nil on zero cardinality func CutLeft(bm *roaring.Bitmap, sizeLimit uint64) *roaring.Bitmap { if bm.GetCardinality() == 0 { return nil } sz := bm.GetSerializedSizeInBytes() if sz <= sizeLimit { lft := roaring.New() lft.AddRange(uint64(bm.Minimum()), uint64(bm.Maximum())+1) lft.And(bm) lft.RunOptimize() bm.Clear() return lft } from := uint64(bm.Minimum()) minMax := bm.Maximum() - bm.Minimum() to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability lft := roaring.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger lft.AddRange(from, from+uint64(i)+1) lft.And(bm) lft.RunOptimize() return lft.GetSerializedSizeInBytes() > sizeLimit }) lft := roaring.New() lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower lft.And(bm) bm.RemoveRange(from, from+uint64(to)) lft.RunOptimize() return lft } func WalkChunks(bm *roaring.Bitmap, sizeLimit uint64, f func(chunk *roaring.Bitmap, isLast bool) error) error { for bm.GetCardinality() > 0 { if err := f(CutLeft(bm, sizeLimit), bm.GetCardinality() == 0); err != nil { return err } } return nil } func WalkChunkWithKeys(k []byte, m *roaring.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring.Bitmap) error) error { return WalkChunks(m, sizeLimit, func(chunk *roaring.Bitmap, isLast bool) error { chunkKey := make([]byte, len(k)+4) copy(chunkKey, k) if isLast { binary.BigEndian.PutUint32(chunkKey[len(k):], ^uint32(0)) } else { binary.BigEndian.PutUint32(chunkKey[len(k):], chunk.Maximum()) } return f(chunkKey, chunk) }) } // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it. // starts from hot shard, stops when shard not overlap with [from-to) // !Important: [from, to) func TruncateRange(db kv.RwTx, bucket string, key []byte, to uint32) error { chunkKey := make([]byte, len(key)+4) copy(chunkKey, key) binary.BigEndian.PutUint32(chunkKey[len(chunkKey)-4:], to) bm, err := Get(db, bucket, key, to, math.MaxUint32) if err != nil { return err } if bm.GetCardinality() > 0 && to <= bm.Maximum() { bm.RemoveRange(uint64(to), uint64(bm.Maximum())+1) } c, err := db.Cursor(bucket) if err != nil { return err } defer c.Close() if err := ethdb.Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) { if !bytes.HasPrefix(k, key) { return false, nil } if err := db.Delete(bucket, k, nil); err != nil { return false, err } return true, nil }); err != nil { return err } buf := bytes.NewBuffer(nil) return WalkChunkWithKeys(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring.Bitmap) error { buf.Reset() if _, err := chunk.WriteTo(buf); err != nil { return err } return db.Put(bucket, chunkKey, libcommon.Copy(buf.Bytes())) }) } // Get - reading as much chunks as needed to satisfy [from, to] condition // join all chunks to 1 bitmap by Or operator func Get(db kv.Tx, bucket string, key []byte, from, to uint32) (*roaring.Bitmap, error) { var chunks []*roaring.Bitmap fromKey := make([]byte, len(key)+4) copy(fromKey, key) binary.BigEndian.PutUint32(fromKey[len(fromKey)-4:], from) c, err := db.Cursor(bucket) if err != nil { return nil, err } defer c.Close() for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() { if err != nil { return nil, err } if !bytes.HasPrefix(k, key) { break } bm := roaring.New() if _, err := bm.ReadFrom(bytes.NewReader(v)); err != nil { return nil, err } chunks = append(chunks, bm) if binary.BigEndian.Uint32(k[len(k)-4:]) >= to { break } } if len(chunks) == 0 { return roaring.New(), nil } return roaring.FastOr(chunks...), nil } // SeekInBitmap - returns value in bitmap which is >= n //nolint:deadcode func SeekInBitmap(m *roaring.Bitmap, n uint32) (found uint32, ok bool) { i := m.Iterator() i.AdvanceIfNeeded(n) ok = i.HasNext() if ok { found = i.Next() } return found, ok } // CutLeft - cut from bitmap `targetSize` bytes from left // removing lft part from `bm` // returns nil on zero cardinality func CutLeft64(bm *roaring64.Bitmap, sizeLimit uint64) *roaring64.Bitmap { if bm.GetCardinality() == 0 { return nil } sz := bm.GetSerializedSizeInBytes() if sz <= sizeLimit { lft := roaring64.New() lft.AddRange(bm.Minimum(), bm.Maximum()+1) lft.And(bm) lft.RunOptimize() bm.Clear() return lft } from := bm.Minimum() minMax := bm.Maximum() - bm.Minimum() to := sort.Search(int(minMax), func(i int) bool { // can be optimized to avoid "too small steps", but let's leave it for readability lft := roaring64.New() // bitmap.Clear() method intentionally not used here, because then serialized size of bitmap getting bigger lft.AddRange(from, from+uint64(i)+1) lft.And(bm) lft.RunOptimize() return lft.GetSerializedSizeInBytes() > sizeLimit }) lft := roaring64.New() lft.AddRange(from, from+uint64(to)) // no +1 because sort.Search returns element which is just higher threshold - but we need lower lft.And(bm) bm.RemoveRange(from, from+uint64(to)) lft.RunOptimize() return lft } func WalkChunks64(bm *roaring64.Bitmap, sizeLimit uint64, f func(chunk *roaring64.Bitmap, isLast bool) error) error { for bm.GetCardinality() > 0 { if err := f(CutLeft64(bm, sizeLimit), bm.GetCardinality() == 0); err != nil { return err } } return nil } func WalkChunkWithKeys64(k []byte, m *roaring64.Bitmap, sizeLimit uint64, f func(chunkKey []byte, chunk *roaring64.Bitmap) error) error { return WalkChunks64(m, sizeLimit, func(chunk *roaring64.Bitmap, isLast bool) error { chunkKey := make([]byte, len(k)+8) copy(chunkKey, k) if isLast { binary.BigEndian.PutUint64(chunkKey[len(k):], ^uint64(0)) } else { binary.BigEndian.PutUint64(chunkKey[len(k):], chunk.Maximum()) } return f(chunkKey, chunk) }) } // TruncateRange - gets existing bitmap in db and call RemoveRange operator on it. // starts from hot shard, stops when shard not overlap with [from-to) // !Important: [from, to) func TruncateRange64(db kv.RwTx, bucket string, key []byte, to uint64) error { chunkKey := make([]byte, len(key)+8) copy(chunkKey, key) binary.BigEndian.PutUint64(chunkKey[len(chunkKey)-8:], to) bm, err := Get64(db, bucket, key, to, math.MaxUint64) if err != nil { return err } if bm.GetCardinality() > 0 && to <= bm.Maximum() { bm.RemoveRange(to, bm.Maximum()+1) } c, err := db.Cursor(bucket) if err != nil { return err } defer c.Close() cDel, err := db.RwCursor(bucket) if err != nil { return err } defer cDel.Close() if err := ethdb.Walk(c, chunkKey, 0, func(k, v []byte) (bool, error) { if !bytes.HasPrefix(k, key) { return false, nil } if err := cDel.Delete(k, nil); err != nil { return false, err } return true, nil }); err != nil { return err } buf := bytes.NewBuffer(nil) return WalkChunkWithKeys64(key, bm, ChunkLimit, func(chunkKey []byte, chunk *roaring64.Bitmap) error { buf.Reset() if _, err := chunk.WriteTo(buf); err != nil { return err } return db.Put(bucket, chunkKey, libcommon.Copy(buf.Bytes())) }) } // Get - reading as much chunks as needed to satisfy [from, to] condition // join all chunks to 1 bitmap by Or operator func Get64(db kv.Tx, bucket string, key []byte, from, to uint64) (*roaring64.Bitmap, error) { var chunks []*roaring64.Bitmap fromKey := make([]byte, len(key)+8) copy(fromKey, key) binary.BigEndian.PutUint64(fromKey[len(fromKey)-8:], from) c, err := db.Cursor(bucket) if err != nil { return nil, err } defer c.Close() for k, v, err := c.Seek(fromKey); k != nil; k, v, err = c.Next() { if err != nil { return nil, err } if !bytes.HasPrefix(k, key) { break } bm := roaring64.New() _, err := bm.ReadFrom(bytes.NewReader(v)) if err != nil { return nil, err } chunks = append(chunks, bm) if binary.BigEndian.Uint64(k[len(k)-8:]) >= to { break } } if len(chunks) == 0 { return roaring64.New(), nil } return roaring64.FastOr(chunks...), nil } // SeekInBitmap - returns value in bitmap which is >= n func SeekInBitmap64(m *roaring64.Bitmap, n uint64) (found uint64, ok bool) { if m.IsEmpty() { return 0, false } if n == 0 { return m.Minimum(), true } searchRank := m.Rank(n - 1) if searchRank >= m.GetCardinality() { return 0, false } found, _ = m.Select(searchRank) return found, true }