erigon-pulse/aggregator/history.go
ledgerwatch 1c6e82c2b6
[erigon2] Thin commitment (2nd attempt) (#329)
* Another fix for history files

* Half way through

* Another fix

* Correct closing sequence

* Remove first byte insert marker

* More on think commitments

* Fixes

* Fixes

* Print

* Skip touchMap

* Merge branchData from trees and from files

* Fill branch commitment

* Fill branch commitment

* Print

* Fix?

* Merge branchData when updating in the tree

* Better panic

* Prints

* Prints

* Prints

* Create complete branch data if it did not exist before

* Cleanup printing

* Fix merge

* Fix merge use

* Fix transform

* Better startBlock panic

* Preserve touchMap

* Merge commitments during aggregation

* Merge commitments during aggregation

* Merge commitments during aggregation

* Merge commitments during aggregation

* Merge commitments

* Merge commitments

* Merge commitments

* Merge commitments

* Merge commitments

* Merge commitments

* Merge commitments

* Include fieldbits during transform

* Fix history reads

* Print

* Print

* Prints

* Prints

* Prints

* Prints

* Prints

* Prints

* Prints

* Prints

* Prints

* Prints

* Prints

* Print largestMerge, lock files in branchFn

* Add storage lock

* Prints

* prefixLen fix

* prefixLen fix

* Fixes

* Remove print

* Remove print

* Set changesets and commitments flags upfront

* Logging instead of printing

* Fix history merge, recsplit panic

Co-authored-by: Alexey Sharp <alexeysharp@Alexeys-iMac.local>
Co-authored-by: Alex Sharp <alexsharp@Alexs-MacBook-Pro.local>
2022-02-16 16:44:00 +00:00

367 lines
11 KiB
Go

/*
Copyright 2022 Erigon contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package aggregator
import (
"bytes"
"encoding/binary"
"fmt"
"io/fs"
"os"
"path"
"regexp"
"strconv"
"strings"
"github.com/RoaringBitmap/roaring/roaring64"
"github.com/google/btree"
"github.com/holiman/uint256"
"github.com/ledgerwatch/erigon-lib/compress"
"github.com/ledgerwatch/erigon-lib/recsplit"
"github.com/ledgerwatch/log/v3"
)
// History is a utility class that allows reading history of state
// from state files, history files, and bitmap files produced by an Aggregator
type History struct {
diffDir string // Directory where the state diff files are stored
files [NumberOfTypes]*btree.BTree
aggregationStep uint64
}
func NewHistory(diffDir string, blockTo uint64, aggregationStep uint64) (*History, error) {
h := &History{
diffDir: diffDir,
aggregationStep: aggregationStep,
}
for fType := FirstType; fType < NumberOfTypes; fType++ {
h.files[fType] = btree.New(32)
}
var closeStateFiles = true // It will be set to false in case of success at the end of the function
defer func() {
// Clean up all decompressor and indices upon error
if closeStateFiles {
h.Close()
}
}()
// Scan the diff directory and create the mapping of end blocks to files
files, err := os.ReadDir(diffDir)
if err != nil {
return nil, err
}
h.scanStateFiles(files, blockTo)
for fType := FirstType; fType < NumberOfTypes; fType++ {
if err := h.openFiles(fType); err != nil {
return nil, fmt.Errorf("opening %s state files: %w", fType.String(), err)
}
}
closeStateFiles = false
return h, nil
}
func (h *History) scanStateFiles(files []fs.DirEntry, blockTo uint64) {
typeStrings := make([]string, NumberOfTypes)
for fType := FileType(0); fType < NumberOfTypes; fType++ {
typeStrings[fType] = fType.String()
}
re := regexp.MustCompile("(" + strings.Join(typeStrings, "|") + ").([0-9]+)-([0-9]+).(dat|idx)")
var err error
for _, f := range files {
name := f.Name()
subs := re.FindStringSubmatch(name)
if len(subs) != 5 {
if len(subs) != 0 {
log.Warn("File ignored by history, more than 4 submatches", "name", name, "submatches", len(subs))
}
continue
}
var startBlock, endBlock uint64
if startBlock, err = strconv.ParseUint(subs[2], 10, 64); err != nil {
log.Warn("File ignored by history, parsing startBlock", "error", err, "name", name)
continue
}
if endBlock, err = strconv.ParseUint(subs[3], 10, 64); err != nil {
log.Warn("File ignored by history, parsing endBlock", "error", err, "name", name)
continue
}
if startBlock > endBlock {
log.Warn("File ignored by history, startBlock > endBlock", "name", name)
continue
}
if endBlock > blockTo {
// Only load files up to specified block
continue
}
fType, ok := ParseFileType(subs[1])
if !ok {
log.Warn("File ignored by history, type unknown", "type", subs[1])
}
var item = &byEndBlockItem{startBlock: startBlock, endBlock: endBlock}
var foundI *byEndBlockItem
h.files[fType].AscendGreaterOrEqual(&byEndBlockItem{startBlock: endBlock, endBlock: endBlock}, func(i btree.Item) bool {
it := i.(*byEndBlockItem)
if it.endBlock == endBlock {
foundI = it
}
return false
})
if foundI == nil || foundI.startBlock > startBlock {
h.files[fType].ReplaceOrInsert(item)
log.Info("Load file", "name", name, "type", fType.String(), "endBlock", item.endBlock)
}
}
}
func (h *History) openFiles(fType FileType) error {
var err error
h.files[fType].Ascend(func(i btree.Item) bool {
item := i.(*byEndBlockItem)
if item.decompressor, err = compress.NewDecompressor(path.Join(h.diffDir, fmt.Sprintf("%s.%d-%d.dat", fType.String(), item.startBlock, item.endBlock))); err != nil {
return false
}
if item.index, err = recsplit.OpenIndex(path.Join(h.diffDir, fmt.Sprintf("%s.%d-%d.idx", fType.String(), item.startBlock, item.endBlock))); err != nil {
return false
}
item.getter = item.decompressor.MakeGetter()
item.getterMerge = item.decompressor.MakeGetter()
item.indexReader = recsplit.NewIndexReader(item.index)
item.readerMerge = recsplit.NewIndexReader(item.index)
return true
})
return err
}
func (h *History) closeFiles(fType FileType) {
h.files[fType].Ascend(func(i btree.Item) bool {
item := i.(*byEndBlockItem)
if item.decompressor != nil {
item.decompressor.Close()
}
if item.index != nil {
item.index.Close()
}
return true
})
}
func (h *History) Close() {
// Closing state files only after background aggregation goroutine is finished
for fType := FirstType; fType < NumberOfTypes; fType++ {
h.closeFiles(fType)
}
}
func (h *History) MakeHistoryReader() *HistoryReader {
r := &HistoryReader{
h: h,
}
return r
}
type HistoryReader struct {
h *History
search byEndBlockItem
blockNum uint64
txNum uint64
lastTx bool // Whether it is the last transaction in the block
}
func (hr *HistoryReader) SetNums(blockNum uint64, txNum uint64, lastTx bool) {
hr.blockNum = blockNum
hr.txNum = txNum
hr.lastTx = lastTx
}
func (hr *HistoryReader) searchInHistory(bitmapType, historyType FileType, key []byte, trace bool) (bool, []byte, error) {
if trace {
fmt.Printf("searchInHistory %s %s [%x] blockNum %d, txNum %d\n", bitmapType.String(), historyType.String(), key, hr.blockNum, hr.txNum)
}
searchBlock := hr.blockNum
if hr.lastTx {
searchBlock++
}
searchTx := hr.txNum
hr.search.endBlock = searchBlock
hr.search.startBlock = searchBlock - (searchBlock % 500_000)
var lookupKey = make([]byte, len(key)+8)
copy(lookupKey, key)
var bitmapVal []byte
bm := roaring64.New()
var err error
var found bool
var foundTxNum uint64
var foundEndBlock uint64
hr.h.files[bitmapType].AscendGreaterOrEqual(&hr.search, func(i btree.Item) bool {
item := i.(*byEndBlockItem)
g := item.getter
for chunkEnd := hr.h.aggregationStep*(searchBlock/hr.h.aggregationStep) + hr.h.aggregationStep - 1; chunkEnd <= item.endBlock; chunkEnd += hr.h.aggregationStep {
if chunkEnd < item.startBlock {
continue
}
binary.BigEndian.PutUint64(lookupKey[len(key):], chunkEnd)
offset := item.indexReader.Lookup(lookupKey)
if trace {
fmt.Printf("Lookup [%x] in %s.[%d-%d].idx = %d\n", lookupKey, bitmapType.String(), item.startBlock, item.endBlock, offset)
}
g.Reset(offset)
if keyMatch, _ := g.Match(lookupKey); keyMatch {
bitmapVal, _ = g.Next(bitmapVal[:0])
bm.Clear()
if _, err = bm.ReadFrom(bytes.NewReader(bitmapVal)); err != nil {
return false
}
if searchTx == 0 {
foundTxNum = bm.Minimum()
foundEndBlock = item.endBlock
found = true
return false
}
searchRank := bm.Rank(searchTx - 1)
if searchRank >= bm.GetCardinality() {
continue
}
foundTxNum, _ = bm.Select(searchRank)
foundEndBlock = item.endBlock
found = true
return false
}
}
// Not found, next
return true
})
if err != nil {
return false, nil, err
}
if !found {
return false, nil, nil
}
if trace {
fmt.Printf("found in tx %d, endBlock %d\n", foundTxNum, foundEndBlock)
}
binary.BigEndian.PutUint64(lookupKey, foundTxNum)
copy(lookupKey[8:], key)
var historyItem *byEndBlockItem
hr.search.endBlock = foundEndBlock
hr.search.startBlock = foundEndBlock - 499_999
if i := hr.h.files[historyType].Get(&hr.search); i != nil {
historyItem = i.(*byEndBlockItem)
} else {
return false, nil, fmt.Errorf("no %s file found for %d", historyType.String(), foundEndBlock)
}
offset := historyItem.indexReader.Lookup(lookupKey)
if trace {
fmt.Printf("Lookup [%x] in %s.[%d-%d].idx = %d\n", lookupKey, historyType.String(), historyItem.startBlock, historyItem.endBlock, offset)
}
historyItem.getter.Reset(offset)
v, _ := historyItem.getter.Next(nil)
return true, v, nil
}
func (hr *HistoryReader) ReadAccountData(addr []byte, trace bool) ([]byte, error) {
// Look in the history first
hOk, v, err := hr.searchInHistory(AccountBitmap, AccountHistory, addr, trace)
if err != nil {
return nil, err
}
if hOk {
return v, nil
}
// Not found in history - look in the state files
return hr.h.readFromFiles(Account, addr, trace), nil
}
func (hr *HistoryReader) ReadAccountStorage(addr []byte, loc []byte, trace bool) (*uint256.Int, error) {
// Look in the history first
dbkey := make([]byte, len(addr)+len(loc))
copy(dbkey[0:], addr)
copy(dbkey[len(addr):], loc)
hOk, v, err := hr.searchInHistory(StorageBitmap, StorageHistory, dbkey, trace)
if err != nil {
return nil, err
}
if hOk {
return new(uint256.Int).SetBytes(v), nil
}
// Not found in history, look in the state files
v = hr.h.readFromFiles(Storage, dbkey, trace)
if v != nil {
return new(uint256.Int).SetBytes(v), nil
}
return nil, nil
}
func (hr *HistoryReader) ReadAccountCode(addr []byte, trace bool) ([]byte, error) {
// Look in the history first
hOk, v, err := hr.searchInHistory(CodeBitmap, CodeHistory, addr, false)
if err != nil {
return nil, err
}
if hOk {
return v, err
}
// Not found in history, look in the history files
return hr.h.readFromFiles(Code, addr, trace), nil
}
func (hr *HistoryReader) ReadAccountCodeSize(addr []byte, trace bool) (int, error) {
// Look in the history first
hOk, v, err := hr.searchInHistory(CodeBitmap, CodeHistory, addr, false)
if err != nil {
return 0, err
}
if hOk {
return len(v), err
}
// Not found in history, look in the history files
return len(hr.h.readFromFiles(Code, addr, trace)), nil
}
func (h *History) readFromFiles(fType FileType, filekey []byte, trace bool) []byte {
var val []byte
h.files[fType].Descend(func(i btree.Item) bool {
item := i.(*byEndBlockItem)
if trace {
fmt.Printf("read %s %x: search in file [%d-%d]\n", fType.String(), filekey, item.startBlock, item.endBlock)
}
if item.tree != nil {
ai := item.tree.Get(&AggregateItem{k: filekey})
if ai == nil {
return true
}
val = ai.(*AggregateItem).v
return false
}
if item.index.Empty() {
return true
}
offset := item.indexReader.Lookup(filekey)
g := item.getter
g.Reset(offset)
if g.HasNext() {
if keyMatch, _ := g.Match(filekey); keyMatch {
val, _ = g.Next(nil)
if trace {
fmt.Printf("read %s %x: found [%x] in file [%d-%d]\n", fType.String(), filekey, val, item.startBlock, item.endBlock)
}
return false
}
}
return true
})
return val
}