erigon-pulse/state/inverted_index.go
2022-09-18 17:38:43 +07:00

672 lines
18 KiB
Go

/*
Copyright 2022 Erigon contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"bytes"
"container/heap"
"context"
"encoding/binary"
"fmt"
"io/fs"
"os"
"path/filepath"
"regexp"
"strconv"
"github.com/RoaringBitmap/roaring/roaring64"
"github.com/google/btree"
"github.com/ledgerwatch/erigon-lib/common/dir"
"github.com/ledgerwatch/erigon-lib/compress"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon-lib/recsplit"
"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
"github.com/ledgerwatch/log/v3"
"golang.org/x/exp/slices"
)
type InvertedIndex struct {
indexKeysTable string // txnNum_u64 -> key (k+auto_increment)
indexTable string // k -> txnNum_u64 , Needs to be table with DupSort
dir string // Directory where static files are created
aggregationStep uint64
filenameBase string
tx kv.RwTx
txNum uint64
txNumBytes [8]byte
files *btree.BTreeG[*filesItem]
}
func NewInvertedIndex(
dir string,
aggregationStep uint64,
filenameBase string,
indexKeysTable string,
indexTable string,
) (*InvertedIndex, error) {
ii := InvertedIndex{
dir: dir,
files: btree.NewG[*filesItem](32, filesItemLess),
aggregationStep: aggregationStep,
filenameBase: filenameBase,
indexKeysTable: indexKeysTable,
indexTable: indexTable,
}
files, err := os.ReadDir(dir)
if err != nil {
return nil, fmt.Errorf("NewInvertedIndex: %s, %w", filenameBase, err)
}
ii.scanStateFiles(files)
if err = ii.openFiles(); err != nil {
return nil, fmt.Errorf("NewInvertedIndex: %s, %w", filenameBase, err)
}
return &ii, nil
}
func (ii *InvertedIndex) scanStateFiles(files []fs.DirEntry) {
re := regexp.MustCompile("^" + ii.filenameBase + ".([0-9]+)-([0-9]+).(ef|efi)$")
var err error
for _, f := range files {
if !f.Type().IsRegular() {
continue
}
name := f.Name()
subs := re.FindStringSubmatch(name)
if len(subs) != 4 {
if len(subs) != 0 {
log.Warn("File ignored by inverted index scan, more than 4 submatches", "name", name, "submatches", len(subs))
}
continue
}
var startTxNum, endTxNum uint64
if startTxNum, err = strconv.ParseUint(subs[1], 10, 64); err != nil {
log.Warn("File ignored by inverted index scan, parsing startTxNum", "error", err, "name", name)
continue
}
if endTxNum, err = strconv.ParseUint(subs[2], 10, 64); err != nil {
log.Warn("File ignored by inverted index scan, parsing endTxNum", "error", err, "name", name)
continue
}
if startTxNum > endTxNum {
log.Warn("File ignored by inverted index scan, startTxNum > endTxNum", "name", name)
continue
}
var item = &filesItem{startTxNum: startTxNum * ii.aggregationStep, endTxNum: endTxNum * ii.aggregationStep}
var foundI *filesItem
ii.files.AscendGreaterOrEqual(&filesItem{startTxNum: endTxNum * ii.aggregationStep, endTxNum: endTxNum * ii.aggregationStep}, func(it *filesItem) bool {
if it.endTxNum == endTxNum {
foundI = it
}
return false
})
if foundI == nil || foundI.startTxNum > startTxNum {
//log.Info("Load state file", "name", name, "startTxNum", startTxNum*ii.aggregationStep, "endTxNum", endTxNum*ii.aggregationStep)
ii.files.ReplaceOrInsert(item)
}
}
}
func (ii *InvertedIndex) openFiles() error {
var err error
var totalKeys uint64
ii.files.Ascend(func(item *filesItem) bool {
datPath := filepath.Join(ii.dir, fmt.Sprintf("%s.%d-%d.ef", ii.filenameBase, item.startTxNum/ii.aggregationStep, item.endTxNum/ii.aggregationStep))
if item.decompressor, err = compress.NewDecompressor(datPath); err != nil {
log.Debug("InvertedIndex.openFiles: %w, %s", err, datPath)
return false
}
idxPath := filepath.Join(ii.dir, fmt.Sprintf("%s.%d-%d.efi", ii.filenameBase, item.startTxNum/ii.aggregationStep, item.endTxNum/ii.aggregationStep))
if !dir.Exist(idxPath) {
if _, err = buildIndex(item.decompressor, idxPath, ii.dir, item.decompressor.Count()/2, false /* values */); err != nil {
return false
}
}
if item.index, err = recsplit.OpenIndex(idxPath); err != nil {
log.Debug("InvertedIndex.openFiles: %w, %s", err, datPath)
return false
}
totalKeys += item.index.KeyCount()
return true
})
if err != nil {
return err
}
return nil
}
func (ii *InvertedIndex) closeFiles() {
ii.files.Ascend(func(item *filesItem) bool {
if item.decompressor != nil {
item.decompressor.Close()
}
if item.index != nil {
item.index.Close()
}
return true
})
}
func (ii *InvertedIndex) Close() {
ii.closeFiles()
}
func (ii *InvertedIndex) Files() (res []string) {
ii.files.Ascend(func(item *filesItem) bool {
if item.decompressor != nil {
_, fName := filepath.Split(item.decompressor.FilePath())
res = append(res, filepath.Join("history", fName))
}
return true
})
return res
}
func (ii *InvertedIndex) SetTx(tx kv.RwTx) {
ii.tx = tx
}
func (ii *InvertedIndex) SetTxNum(txNum uint64) {
ii.txNum = txNum
binary.BigEndian.PutUint64(ii.txNumBytes[:], ii.txNum)
}
func (ii *InvertedIndex) add(key, indexKey []byte) error {
if err := ii.tx.Put(ii.indexKeysTable, ii.txNumBytes[:], key); err != nil {
return err
}
if err := ii.tx.Put(ii.indexTable, indexKey, ii.txNumBytes[:]); err != nil {
return err
}
return nil
}
func (ii *InvertedIndex) Add(key []byte) error {
return ii.add(key, key)
}
func (ii *InvertedIndex) MakeContext() *InvertedIndexContext {
var ic = InvertedIndexContext{ii: ii}
ic.files = btree.NewG[*ctxItem](32, ctxItemLess)
ii.files.Ascend(func(item *filesItem) bool {
ic.files.ReplaceOrInsert(&ctxItem{
startTxNum: item.startTxNum,
endTxNum: item.endTxNum,
getter: item.decompressor.MakeGetter(),
reader: recsplit.NewIndexReader(item.index),
})
return true
})
return &ic
}
// InvertedIterator allows iteration over range of tx numbers
// Iteration is not implmented via callback function, because there is often
// a requirement for interators to be composable (for example, to implement AND and OR for indices)
// InvertedIterator must be closed after use to prevent leaking of resources like cursor
type InvertedIterator struct {
key []byte
startTxNum, endTxNum uint64
stack []*ctxItem
efIt *eliasfano32.EliasFanoIter
next uint64
hasNextInFiles bool
hasNextInDb bool
roTx kv.Tx
indexTable string
cursor kv.CursorDupSort
}
func (it *InvertedIterator) Close() {
if it.cursor != nil {
it.cursor.Close()
}
}
func (it *InvertedIterator) advanceInFiles() {
for {
for it.efIt == nil {
if len(it.stack) == 0 {
it.hasNextInFiles = false
return
}
item := it.stack[len(it.stack)-1]
it.stack = it.stack[:len(it.stack)-1]
offset := item.reader.Lookup(it.key)
g := item.getter
g.Reset(offset)
if k, _ := g.NextUncompressed(); bytes.Equal(k, it.key) {
eliasVal, _ := g.NextUncompressed()
ef, _ := eliasfano32.ReadEliasFano(eliasVal)
it.efIt = ef.Iterator()
}
}
for it.efIt.HasNext() {
n := it.efIt.Next()
if n >= it.endTxNum {
it.hasNextInFiles = false
return
}
if n >= it.startTxNum {
it.hasNextInFiles = true
it.next = n
return
}
}
it.efIt = nil // Exhausted this iterator
}
}
func (it *InvertedIterator) advanceInDb() {
var v []byte
var err error
if it.cursor == nil {
if it.cursor, err = it.roTx.CursorDupSort(it.indexTable); err != nil {
// TODO pass error properly around
panic(err)
}
var k []byte
if k, v, err = it.cursor.Seek(it.key); err != nil {
// TODO pass error properly around
panic(err)
}
if !bytes.Equal(k, it.key) {
it.cursor.Close()
it.hasNextInDb = false
return
}
} else {
_, v, err = it.cursor.NextDup()
if err != nil {
// TODO pass error properly around
panic(err)
}
}
for ; err == nil && v != nil; _, v, err = it.cursor.NextDup() {
n := binary.BigEndian.Uint64(v)
if n >= it.endTxNum {
it.cursor.Close()
it.hasNextInDb = false
return
}
if n >= it.startTxNum {
it.hasNextInDb = true
it.next = n
return
}
}
if err != nil {
// TODO pass error properly around
panic(err)
}
it.cursor.Close()
it.hasNextInDb = false
}
func (it *InvertedIterator) advance() {
if it.hasNextInFiles {
it.advanceInFiles()
}
if it.hasNextInDb && !it.hasNextInFiles {
it.advanceInDb()
}
}
func (it *InvertedIterator) HasNext() bool {
return it.hasNextInFiles || it.hasNextInDb
}
func (it *InvertedIterator) Next() uint64 {
n := it.next
it.advance()
return n
}
type InvertedIndexContext struct {
ii *InvertedIndex
files *btree.BTreeG[*ctxItem]
}
// IterateRange is to be used in public API, therefore it relies on read-only transaction
// so that iteration can be done even when the inverted index is being updated.
// [startTxNum; endNumTx)
func (ic *InvertedIndexContext) IterateRange(key []byte, startTxNum, endTxNum uint64, roTx kv.Tx) InvertedIterator {
it := InvertedIterator{
key: key,
startTxNum: startTxNum,
endTxNum: endTxNum,
indexTable: ic.ii.indexTable,
roTx: roTx,
}
var search ctxItem
it.hasNextInDb = true
search.startTxNum = 0
search.endTxNum = startTxNum
ic.files.DescendGreaterThan(&search, func(item *ctxItem) bool {
if item.startTxNum < endTxNum {
it.stack = append(it.stack, item)
it.hasNextInFiles = true
}
if item.endTxNum >= endTxNum {
it.hasNextInDb = false
}
return true
})
it.advance()
return it
}
type InvertedIterator1 struct {
hasNextInFiles bool
hasNextInDb bool
startTxKey [8]byte
startTxNum uint64
endTxNum uint64
roTx kv.Tx
cursor kv.CursorDupSort
indexTable string
h ReconHeap
key, nextKey, nextFileKey, nextDbKey []byte
}
func (it *InvertedIterator1) Close() {
if it.cursor != nil {
it.cursor.Close()
}
}
func (it *InvertedIterator1) advanceInFiles() {
for it.h.Len() > 0 {
top := heap.Pop(&it.h).(*ReconItem)
key := top.key
val, _ := top.g.NextUncompressed()
if top.g.HasNext() {
top.key, _ = top.g.NextUncompressed()
heap.Push(&it.h, top)
}
if !bytes.Equal(key, it.key) {
ef, _ := eliasfano32.ReadEliasFano(val)
min := ef.Get(0)
max := ef.Max()
if min < it.endTxNum && max >= it.startTxNum { // Intersection of [min; max) and [it.startTxNum; it.endTxNum)
it.key = key
it.nextFileKey = key
return
}
}
}
it.hasNextInFiles = false
}
func (it *InvertedIterator1) advanceInDb() {
var k, v []byte
var err error
if it.cursor == nil {
if it.cursor, err = it.roTx.CursorDupSort(it.indexTable); err != nil {
// TODO pass error properly around
panic(err)
}
if k, _, err = it.cursor.First(); err != nil {
// TODO pass error properly around
panic(err)
}
} else {
if k, _, err = it.cursor.NextNoDup(); err != nil {
panic(err)
}
}
for k != nil {
if v, err = it.cursor.SeekBothRange(k, it.startTxKey[:]); err != nil {
panic(err)
}
if v != nil {
txNum := binary.BigEndian.Uint64(v)
if txNum < it.endTxNum {
it.nextDbKey = append(it.nextDbKey[:0], k...)
return
}
}
if k, _, err = it.cursor.NextNoDup(); err != nil {
panic(err)
}
}
it.cursor.Close()
it.cursor = nil
it.hasNextInDb = false
}
func (it *InvertedIterator1) advance() {
if it.hasNextInFiles {
if it.hasNextInDb {
c := bytes.Compare(it.nextFileKey, it.nextDbKey)
if c < 0 {
it.nextKey = append(it.nextKey[:0], it.nextFileKey...)
it.advanceInFiles()
} else if c > 0 {
it.nextKey = append(it.nextKey[:0], it.nextDbKey...)
it.advanceInDb()
} else {
it.nextKey = append(it.nextKey[:0], it.nextFileKey...)
it.advanceInDb()
it.advanceInFiles()
}
} else {
it.nextKey = append(it.nextKey[:0], it.nextFileKey...)
it.advanceInFiles()
}
} else if it.hasNextInDb {
it.nextKey = append(it.nextKey[:0], it.nextDbKey...)
it.advanceInDb()
} else {
it.nextKey = nil
}
}
func (it *InvertedIterator1) HasNext() bool {
return it.hasNextInFiles || it.hasNextInDb || it.nextKey != nil
}
func (it *InvertedIterator1) Next(keyBuf []byte) []byte {
result := append(keyBuf, it.nextKey...)
it.advance()
return result
}
func (ic *InvertedIndexContext) IterateChangedKeys(startTxNum, endTxNum uint64, roTx kv.Tx) InvertedIterator1 {
var ii1 InvertedIterator1
ii1.hasNextInDb = true
ii1.roTx = roTx
ii1.indexTable = ic.ii.indexTable
ic.files.AscendGreaterOrEqual(&ctxItem{endTxNum: startTxNum}, func(item *ctxItem) bool {
if item.endTxNum >= endTxNum {
ii1.hasNextInDb = false
}
if item.endTxNum <= startTxNum {
return true
}
if item.startTxNum >= endTxNum {
return false
}
g := item.getter
if g.HasNext() {
key, _ := g.NextUncompressed()
heap.Push(&ii1.h, &ReconItem{startTxNum: item.startTxNum, endTxNum: item.endTxNum, g: g, txNum: ^item.endTxNum, key: key})
ii1.hasNextInFiles = true
}
return true
})
binary.BigEndian.PutUint64(ii1.startTxKey[:], startTxNum)
ii1.startTxNum = startTxNum
ii1.endTxNum = endTxNum
ii1.advanceInDb()
ii1.advanceInFiles()
ii1.advance()
return ii1
}
func (ii *InvertedIndex) collate(txFrom, txTo uint64, roTx kv.Tx) (map[string]*roaring64.Bitmap, error) {
keysCursor, err := roTx.CursorDupSort(ii.indexKeysTable)
if err != nil {
return nil, fmt.Errorf("create %s keys cursor: %w", ii.filenameBase, err)
}
defer keysCursor.Close()
indexBitmaps := map[string]*roaring64.Bitmap{}
var txKey [8]byte
binary.BigEndian.PutUint64(txKey[:], txFrom)
var k, v []byte
for k, v, err = keysCursor.Seek(txKey[:]); err == nil && k != nil; k, v, err = keysCursor.Next() {
txNum := binary.BigEndian.Uint64(k)
if txNum >= txTo {
break
}
var bitmap *roaring64.Bitmap
var ok bool
if bitmap, ok = indexBitmaps[string(v)]; !ok {
bitmap = roaring64.New()
indexBitmaps[string(v)] = bitmap
}
bitmap.Add(txNum)
}
if err != nil {
return nil, fmt.Errorf("iterate over %s keys cursor: %w", ii.filenameBase, err)
}
return indexBitmaps, nil
}
type InvertedFiles struct {
decomp *compress.Decompressor
index *recsplit.Index
}
func (sf InvertedFiles) Close() {
if sf.decomp != nil {
sf.decomp.Close()
}
if sf.index != nil {
sf.index.Close()
}
}
func (ii *InvertedIndex) buildFiles(step uint64, bitmaps map[string]*roaring64.Bitmap) (InvertedFiles, error) {
var decomp *compress.Decompressor
var index *recsplit.Index
var comp *compress.Compressor
var err error
closeComp := true
defer func() {
if closeComp {
if comp != nil {
comp.Close()
}
if decomp != nil {
decomp.Close()
}
if index != nil {
index.Close()
}
}
}()
txNumFrom := step * ii.aggregationStep
txNumTo := (step + 1) * ii.aggregationStep
datPath := filepath.Join(ii.dir, fmt.Sprintf("%s.%d-%d.ef", ii.filenameBase, txNumFrom/ii.aggregationStep, txNumTo/ii.aggregationStep))
comp, err = compress.NewCompressor(context.Background(), "ef", datPath, ii.dir, compress.MinPatternScore, 1, log.LvlDebug)
if err != nil {
return InvertedFiles{}, fmt.Errorf("create %s compressor: %w", ii.filenameBase, err)
}
var buf []byte
keys := make([]string, 0, len(bitmaps))
for key := range bitmaps {
keys = append(keys, key)
}
slices.Sort(keys)
for _, key := range keys {
if err = comp.AddUncompressedWord([]byte(key)); err != nil {
return InvertedFiles{}, fmt.Errorf("add %s key [%x]: %w", ii.filenameBase, key, err)
}
bitmap := bitmaps[key]
ef := eliasfano32.NewEliasFano(bitmap.GetCardinality(), bitmap.Maximum())
it := bitmap.Iterator()
for it.HasNext() {
ef.AddOffset(it.Next())
}
ef.Build()
buf = ef.AppendBytes(buf[:0])
if err = comp.AddUncompressedWord(buf); err != nil {
return InvertedFiles{}, fmt.Errorf("add %s val: %w", ii.filenameBase, err)
}
}
if err = comp.Compress(); err != nil {
return InvertedFiles{}, fmt.Errorf("compress %s: %w", ii.filenameBase, err)
}
comp.Close()
comp = nil
if decomp, err = compress.NewDecompressor(datPath); err != nil {
return InvertedFiles{}, fmt.Errorf("open %s decompressor: %w", ii.filenameBase, err)
}
idxPath := filepath.Join(ii.dir, fmt.Sprintf("%s.%d-%d.efi", ii.filenameBase, txNumFrom/ii.aggregationStep, txNumTo/ii.aggregationStep))
if index, err = buildIndex(decomp, idxPath, ii.dir, len(keys), false /* values */); err != nil {
return InvertedFiles{}, fmt.Errorf("build %s efi: %w", ii.filenameBase, err)
}
closeComp = false
return InvertedFiles{decomp: decomp, index: index}, nil
}
func (ii *InvertedIndex) integrateFiles(sf InvertedFiles, txNumFrom, txNumTo uint64) {
ii.files.ReplaceOrInsert(&filesItem{
startTxNum: txNumFrom,
endTxNum: txNumTo,
decompressor: sf.decomp,
index: sf.index,
})
}
// [txFrom; txTo)
func (ii *InvertedIndex) prune(txFrom, txTo uint64) error {
keysCursor, err := ii.tx.RwCursorDupSort(ii.indexKeysTable)
if err != nil {
return fmt.Errorf("create %s keys cursor: %w", ii.filenameBase, err)
}
defer keysCursor.Close()
var txKey [8]byte
binary.BigEndian.PutUint64(txKey[:], txFrom)
var k, v []byte
idxC, err := ii.tx.RwCursorDupSort(ii.indexTable)
if err != nil {
return err
}
defer idxC.Close()
for k, v, err = keysCursor.Seek(txKey[:]); err == nil && k != nil; k, v, err = keysCursor.Next() {
txNum := binary.BigEndian.Uint64(k)
if txNum >= txTo {
break
}
if err = idxC.DeleteExact(v, k); err != nil {
return err
}
// This DeleteCurrent needs to the the last in the loop iteration, because it invalidates k and v
if err = keysCursor.DeleteCurrent(); err != nil {
return err
}
}
if err != nil {
return fmt.Errorf("iterate over %s keys: %w", ii.filenameBase, err)
}
return nil
}