erigon-pulse/trie/trie_pruning.go
Alex Sharov ce96cf75b2
Intermediate hash phase 3 (#377)
* #remove debug prints

* remove storage-mode="i"

* minnet re-execute hack with checkpoints

* minnet re-execute hack with checkpoints

* rollback to master setup

* mainnet re-exec hack

* rollback some changes

* v0 of "push down" functionality

* move all logic to own functions

* handle case when re-created account already has some storage

* clear path for storage

* try to rely on tree structure (but maybe need to rely on DB because can be intra-block re-creations of account)

* fix some bugs with indexes, moving to tests

* tests added

* make linter happy

* make linter happy

* simplify logic

* adjust comparison of keys with and without incarnation

* test for keyIsBefore

* test for keyIsBefore

* better nibbles alignment

* better nibbles alignment

* cleanup

* continue work on tests

* simplify test

* check tombstone existence before pushing it down.

* put tombstone only when account deleted, not created

* put tombstone only when account has storage

* make linter happy

* test for storage resolver

* make fixedbytes work without incarnation

* fix panic on short keys

* use special comparison only when working with keys from cache

* add blockNr for better tracing

* fix: incorrect tombstone check

* fix: incorrect tombstone check

* trigger ci

* hack for problem block

* more test-cases

* add test case for too long keys

* speedup cached resolver by removing bucket creation transaction

* remove parent type check in pruning, remove unused copy from mutation.put

* dump resolving info on fail

* dump resolving info on fail

* set tombstone everytime for now to check if it will help

* on unload: check parent type, not type of node

* fix wrong order of checking node type

* fix wrong order of checking node type

* rebase to new master

* make linter happy

* rebase to new master

* place tombstone only if acc has storage

* rebase master

* rebase master

* rebase master

* rebase master

Co-authored-by: alex.sharov <alex.sharov@lazada.com>
2020-03-11 10:31:49 +00:00

262 lines
7.1 KiB
Go

// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty off
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Pruning of the Merkle Patricia trees
package trie
import (
"fmt"
"sort"
"strings"
"github.com/ledgerwatch/turbo-geth/common"
"github.com/ledgerwatch/turbo-geth/common/debug"
"github.com/ledgerwatch/turbo-geth/common/pool"
)
type TriePruning struct {
accountTimestamps map[string]uint64
// Maps timestamp (uint64) to set of prefixes of nodes (string)
accounts map[uint64]map[string]struct{}
// For each timestamp, keeps number of branch nodes belonging to it
generationCounts map[uint64]int
// Keeps total number of branch nodes
nodeCount int
// The oldest timestamp of all branch nodes
oldestGeneration uint64
// Current timestamp
blockNr uint64
createNodeFunc func(prefixAsNibbles []byte)
unloadNodeFunc func(prefix []byte, nodeHash []byte) // called when fullNode or dualNode unloaded
}
func NewTriePruning(oldestGeneration uint64) *TriePruning {
return &TriePruning{
oldestGeneration: oldestGeneration,
blockNr: oldestGeneration,
accountTimestamps: make(map[string]uint64),
accounts: make(map[uint64]map[string]struct{}),
generationCounts: make(map[uint64]int),
createNodeFunc: func([]byte) {},
}
}
func (tp *TriePruning) SetBlockNr(blockNr uint64) {
tp.blockNr = blockNr
}
func (tp *TriePruning) BlockNr() uint64 {
return tp.blockNr
}
func (tp *TriePruning) SetCreateNodeFunc(f func(prefixAsNibbles []byte)) {
tp.createNodeFunc = f
}
func (tp *TriePruning) SetUnloadNodeFunc(f func(prefix []byte, nodeHash []byte)) {
tp.unloadNodeFunc = f
}
// Updates a node to the current timestamp
// contract is effectively address of the smart contract
// hex is the prefix of the key
// parent is the node that needs to be modified to unload the touched node
// exists is true when the node existed before, and false if it is a new one
// prevTimestamp is the timestamp the node current has
func (tp *TriePruning) touch(hexS string, exists bool, prevTimestamp uint64, del bool, newTimestamp uint64) {
//fmt.Printf("TouchFrom %x, exists: %t, prevTimestamp %d, del %t, newTimestamp %d\n", hexS, exists, prevTimestamp, del, newTimestamp)
if exists && !del && prevTimestamp == newTimestamp {
return
}
if !del {
var newMap map[string]struct{}
if m, ok := tp.accounts[newTimestamp]; ok {
newMap = m
} else {
newMap = make(map[string]struct{})
tp.accounts[newTimestamp] = newMap
}
newMap[hexS] = struct{}{}
}
if exists {
if m, ok := tp.accounts[prevTimestamp]; ok {
delete(m, hexS)
if len(m) == 0 {
delete(tp.accounts, prevTimestamp)
}
}
}
// Update generation count
if !del {
tp.generationCounts[newTimestamp]++
tp.nodeCount++
}
if exists {
tp.generationCounts[prevTimestamp]--
if tp.generationCounts[prevTimestamp] == 0 {
delete(tp.generationCounts, prevTimestamp)
}
tp.nodeCount--
}
}
func (tp *TriePruning) Timestamp(hex []byte) uint64 {
ts := tp.accountTimestamps[string(hex)]
return ts
}
// Updates a node to the current timestamp
// contract is effectively address of the smart contract
// hex is the prefix of the key
// parent is the node that needs to be modified to unload the touched node
func (tp *TriePruning) Touch(hex []byte, del bool) {
var exists = false
var prevTimestamp uint64
hexS := string(common.CopyBytes(hex))
if m, ok := tp.accountTimestamps[hexS]; ok {
prevTimestamp = m
exists = true
if del {
delete(tp.accountTimestamps, hexS)
}
}
if !del {
tp.accountTimestamps[hexS] = tp.blockNr
}
if !exists {
tp.createNodeFunc([]byte(hexS))
}
tp.touch(hexS, exists, prevTimestamp, del, tp.blockNr)
}
func pruneMap(t *Trie, m map[string]struct{}) bool {
hexes := make([]string, len(m))
i := 0
for hexS := range m {
hexes[i] = hexS
i++
}
var empty = false
sort.Strings(hexes)
for i, hex := range hexes {
if i == 0 || len(hex) == 0 || !strings.HasPrefix(hex, hexes[i-1]) { // If the parent nodes pruned, there is no need to prune descendants
t.unload([]byte(hex))
if len(hex) == 0 {
empty = true
}
}
}
return empty
}
// Prunes all nodes that are older than given timestamp
func (tp *TriePruning) PruneToTimestamp(
accountsTrie *Trie,
targetTimestamp uint64,
) {
// Remove (unload) nodes from storage tries and account trie
aggregateAccounts := make(map[string]struct{})
for gen := tp.oldestGeneration; gen < targetTimestamp; gen++ {
tp.nodeCount -= tp.generationCounts[gen]
if m, ok := tp.accounts[gen]; ok {
for hexS := range m {
aggregateAccounts[hexS] = struct{}{}
}
}
delete(tp.accounts, gen)
}
if debug.IsIntermediateTrieHash() { // calculate all hashes and send them to hashBucket before unloading from tree
key := pool.GetBuffer(64)
defer pool.PutBuffer(key)
for prefix := range aggregateAccounts {
if len(prefix) == 0 || len(prefix)%2 == 1 {
continue
}
nd, parent, ok := accountsTrie.getNode([]byte(prefix), false)
if !ok {
continue
}
switch parent.(type) {
case *duoNode, *fullNode:
CompressNibbles([]byte(prefix), &key.B)
tp.unloadNodeFunc(key.B, nd.reference())
default:
}
}
}
pruneMap(accountsTrie, aggregateAccounts)
// Remove fom the timestamp structure
for hexS := range aggregateAccounts {
delete(tp.accountTimestamps, hexS)
}
tp.oldestGeneration = targetTimestamp
}
// Prunes mininum number of generations necessary so that the total
// number of prunable nodes is at most `targetNodeCount`
func (tp *TriePruning) PruneTo(
accountsTrie *Trie,
targetNodeCount int,
) bool {
if tp.nodeCount <= targetNodeCount {
return false
}
excess := tp.nodeCount - targetNodeCount
prunable := 0
pruneGeneration := tp.oldestGeneration
for prunable < excess && pruneGeneration < tp.blockNr {
prunable += tp.generationCounts[pruneGeneration]
pruneGeneration++
}
//fmt.Printf("Will prune to generation %d, nodes to prune: %d, excess %d\n", pruneGeneration, prunable, excess)
tp.PruneToTimestamp(accountsTrie, pruneGeneration)
return true
}
func (tp *TriePruning) NodeCount() int {
return tp.nodeCount
}
func (tp *TriePruning) GenCounts() map[uint64]int {
return tp.generationCounts
}
// DebugDump is used in the tests to ensure that there are no prunable entries (in such case, this function returns empty string)
func (tp *TriePruning) DebugDump() string {
var sb strings.Builder
for timestamp, m := range tp.accounts {
for account := range m {
sb.WriteString(fmt.Sprintf("%d %x\n", timestamp, account))
}
}
return sb.String()
}