erigon-pulse/erigon-lib/state/merge.go

/*
   Copyright 2022 Erigon contributors

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
*/

package state

import (
	"bytes"
	"container/heap"
	"context"
	"encoding/binary"
	"fmt"
	"os"
	"path/filepath"
	"strings"

	"github.com/ledgerwatch/erigon-lib/common/background"
	"github.com/ledgerwatch/log/v3"

	"github.com/ledgerwatch/erigon-lib/common"
	"github.com/ledgerwatch/erigon-lib/common/cmp"
	"github.com/ledgerwatch/erigon-lib/compress"
	"github.com/ledgerwatch/erigon-lib/recsplit"
	"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
)

func (d *Domain) endTxNumMinimax() uint64 {
	minimax := d.History.endTxNumMinimax()
	if max, ok := d.files.Max(); ok {
		endTxNum := max.endTxNum
		if minimax == 0 || endTxNum < minimax {
			minimax = endTxNum
		}
	}
	return minimax
}

func (ii *InvertedIndex) endTxNumMinimax() uint64 {
	var minimax uint64
	if max, ok := ii.files.Max(); ok {
		endTxNum := max.endTxNum
		if minimax == 0 || endTxNum < minimax {
			minimax = endTxNum
		}
	}
	return minimax
}
func (ii *InvertedIndex) endIndexedTxNumMinimax(needFrozen bool) uint64 {
	var max uint64
	ii.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.index == nil || (needFrozen && !item.frozen) {
				continue
			}
			max = cmp.Max(max, item.endTxNum)
		}
		return true
	})
	return max
}

func (h *History) endTxNumMinimax() uint64 {
	minimax := h.InvertedIndex.endTxNumMinimax()
	if max, ok := h.files.Max(); ok {
		endTxNum := max.endTxNum
		if minimax == 0 || endTxNum < minimax {
			minimax = endTxNum
		}
	}
	return minimax
}
func (h *History) endIndexedTxNumMinimax(needFrozen bool) uint64 {
	var max uint64
	h.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.index == nil || (needFrozen && !item.frozen) {
				continue
			}
			max = cmp.Max(max, item.endTxNum)
		}
		return true
	})
	return cmp.Min(max, h.InvertedIndex.endIndexedTxNumMinimax(needFrozen))
}

type DomainRanges struct {
	valuesStartTxNum  uint64
	valuesEndTxNum    uint64
	historyStartTxNum uint64
	historyEndTxNum   uint64
	indexStartTxNum   uint64
	indexEndTxNum     uint64
	values            bool
	history           bool
	index             bool
}

func (r DomainRanges) String() string {
	var b strings.Builder
	if r.values {
		b.WriteString(fmt.Sprintf("Values: [%d, %d)", r.valuesStartTxNum, r.valuesEndTxNum))
	}
	if r.history {
		if b.Len() > 0 {
			b.WriteString(", ")
		}
		b.WriteString(fmt.Sprintf("History: [%d, %d)", r.historyStartTxNum, r.historyEndTxNum))
	}
	if r.index {
		if b.Len() > 0 {
			b.WriteString(", ")
		}
		b.WriteString(fmt.Sprintf("Index: [%d, %d)", r.indexStartTxNum, r.indexEndTxNum))
	}
	return b.String()
}

func (r DomainRanges) any() bool {
	return r.values || r.history || r.index
}

// findMergeRange assumes that all fTypes in d.files have items at least as far as maxEndTxNum
// That is why only Values type is inspected
func (d *Domain) findMergeRange(maxEndTxNum, maxSpan uint64) DomainRanges {
	hr := d.History.findMergeRange(maxEndTxNum, maxSpan)
	r := DomainRanges{
		historyStartTxNum: hr.historyStartTxNum,
		historyEndTxNum:   hr.historyEndTxNum,
		history:           hr.history,
		indexStartTxNum:   hr.indexStartTxNum,
		indexEndTxNum:     hr.indexEndTxNum,
		index:             hr.index,
	}
	d.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.endTxNum > maxEndTxNum {
				return false
			}
			endStep := item.endTxNum / d.aggregationStep
			spanStep := endStep & -endStep // Extract rightmost bit in the binary representation of endStep, this corresponds to size of maximally possible merge ending at endStep
			span := cmp.Min(spanStep*d.aggregationStep, maxSpan)
			start := item.endTxNum - span
			if start < item.startTxNum {
				if !r.values || start < r.valuesStartTxNum {
					r.values = true
					r.valuesStartTxNum = start
					r.valuesEndTxNum = item.endTxNum
				}
			}
		}
		return true
	})
	return r
}

// 0-1,1-2,2-3,3-4: allow merge 0-1
// 0-2,2-3,3-4: allow merge 0-4
// 0-2,2-4: allow merge 0-4
//
// 0-1,1-2,2-3: allow merge 0-2
//
// 0-2,2-3: nothing to merge
func (ii *InvertedIndex) findMergeRange(maxEndTxNum, maxSpan uint64) (bool, uint64, uint64) {
	var minFound bool
	var startTxNum, endTxNum uint64
	ii.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.endTxNum > maxEndTxNum {
				continue
			}
			endStep := item.endTxNum / ii.aggregationStep
			spanStep := endStep & -endStep // Extract rightmost bit in the binary representation of endStep, this corresponds to size of maximally possible merge ending at endStep
			span := cmp.Min(spanStep*ii.aggregationStep, maxSpan)
			start := item.endTxNum - span
			foundSuperSet := startTxNum == item.startTxNum && item.endTxNum >= endTxNum
			if foundSuperSet {
				minFound = false
				startTxNum = start
				endTxNum = item.endTxNum
			} else if start < item.startTxNum {
				if !minFound || start < startTxNum {
					minFound = true
					startTxNum = start
					endTxNum = item.endTxNum
				}
			}
		}
		return true
	})
	return minFound, startTxNum, endTxNum
}

func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int, ictx *InvertedIndexContext, ps *background.ProgressSet) (err error) {
	closeAll := true
	for updated, startTx, endTx := ii.findMergeRange(maxSpan, maxTxNum); updated; updated, startTx, endTx = ii.findMergeRange(maxTxNum, maxSpan) {
		staticFiles, _ := ictx.staticFilesInRange(startTx, endTx)
		defer func() {
			if closeAll {
				for _, i := range staticFiles {
					i.decompressor.Close()
					i.index.Close()
				}
			}
		}()

		mergedIndex, err := ii.mergeFiles(ctx, staticFiles, startTx, endTx, workers, ps)
		if err != nil {
			return err
		}
		defer func() {
			if closeAll {
				mergedIndex.decompressor.Close()
				mergedIndex.index.Close()
			}
		}()

		ii.integrateMergedFiles(staticFiles, mergedIndex)
		if mergedIndex.frozen {
			ii.cleanAfterFreeze(mergedIndex.endTxNum)
		}
	}
	closeAll = false
	return nil
}

type HistoryRanges struct {
	historyStartTxNum uint64
	historyEndTxNum   uint64
	indexStartTxNum   uint64
	indexEndTxNum     uint64
	history           bool
	index             bool
}

func (r HistoryRanges) String(aggStep uint64) string {
	var str string
	if r.history {
		str += fmt.Sprintf("hist: %d-%d, ", r.historyStartTxNum/aggStep, r.historyEndTxNum/aggStep)
	}
	if r.index {
		str += fmt.Sprintf("idx: %d-%d", r.indexStartTxNum/aggStep, r.indexEndTxNum/aggStep)
	}
	return str
}
func (r HistoryRanges) any() bool {
	return r.history || r.index
}

func (h *History) findMergeRange(maxEndTxNum, maxSpan uint64) HistoryRanges {
	var r HistoryRanges
	r.index, r.indexStartTxNum, r.indexEndTxNum = h.InvertedIndex.findMergeRange(maxEndTxNum, maxSpan)
	h.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.endTxNum > maxEndTxNum {
				continue
			}
			endStep := item.endTxNum / h.aggregationStep
			spanStep := endStep & -endStep // Extract rightmost bit in the binary representation of endStep, this corresponds to size of maximally possible merge ending at endStep
			span := cmp.Min(spanStep*h.aggregationStep, maxSpan)
			start := item.endTxNum - span
			foundSuperSet := r.indexStartTxNum == item.startTxNum && item.endTxNum >= r.historyEndTxNum
			if foundSuperSet {
				r.history = false
				r.historyStartTxNum = start
				r.historyEndTxNum = item.endTxNum
			} else if start < item.startTxNum {
				if !r.history || start < r.historyStartTxNum {
					r.history = true
					r.historyStartTxNum = start
					r.historyEndTxNum = item.endTxNum
				}
			}
		}
		return true
	})

	if r.history && r.index {
		// history is behind idx: then merge only history
		historyIsAgead := r.historyEndTxNum > r.indexEndTxNum
		if historyIsAgead {
			r.history, r.historyStartTxNum, r.historyEndTxNum = false, 0, 0
			return r
		}

		historyIsBehind := r.historyEndTxNum < r.indexEndTxNum
		if historyIsBehind {
			r.index, r.indexStartTxNum, r.indexEndTxNum = false, 0, 0
			return r
		}
	}
	return r
}

// staticFilesInRange returns list of static files with txNum in specified range [startTxNum; endTxNum)
// files are in the descending order of endTxNum
func (dc *DomainContext) staticFilesInRange(r DomainRanges) (valuesFiles, indexFiles, historyFiles []*filesItem, startJ int) {
	if r.index || r.history {
		var err error
		indexFiles, historyFiles, startJ, err = dc.hc.staticFilesInRange(HistoryRanges{
			historyStartTxNum: r.historyStartTxNum,
			historyEndTxNum:   r.historyEndTxNum,
			history:           r.history,
			indexStartTxNum:   r.indexStartTxNum,
			indexEndTxNum:     r.indexEndTxNum,
			index:             r.index,
		})
		if err != nil {
			panic(err)
		}
	}
	if r.values {
		for _, item := range dc.files {
			if item.startTxNum < r.valuesStartTxNum {
				startJ++
				continue
			}
			if item.endTxNum > r.valuesEndTxNum {
				break
			}
			valuesFiles = append(valuesFiles, item.src)
		}
		for _, f := range valuesFiles {
			if f == nil {
				panic("must not happen")
			}
		}
	}
	return
}

// nolint
func (d *Domain) staticFilesInRange(r DomainRanges, dc *DomainContext) (valuesFiles, indexFiles, historyFiles []*filesItem, startJ int) {
	panic("deprecated: use DomainContext.staticFilesInRange")
}
func (ic *InvertedIndexContext) staticFilesInRange(startTxNum, endTxNum uint64) ([]*filesItem, int) {
	files := make([]*filesItem, 0, len(ic.files))
	var startJ int

	for _, item := range ic.files {
		if item.startTxNum < startTxNum {
			startJ++
			continue
		}
		if item.endTxNum > endTxNum {
			break
		}
		files = append(files, item.src)
	}
	for _, f := range files {
		if f == nil {
			panic("must not happen")
		}
	}

	return files, startJ
}

// nolint
func (ii *InvertedIndex) staticFilesInRange(startTxNum, endTxNum uint64, ic *InvertedIndexContext) ([]*filesItem, int) {
	panic("deprecated: use InvertedIndexContext.staticFilesInRange")
}

func (hc *HistoryContext) staticFilesInRange(r HistoryRanges) (indexFiles, historyFiles []*filesItem, startJ int, err error) {
	if !r.history && r.index {
		indexFiles, startJ = hc.ic.staticFilesInRange(r.indexStartTxNum, r.indexEndTxNum)
		return indexFiles, historyFiles, startJ, nil
	}

	if r.history {
		// Get history files from HistoryContext (no "garbage/overalps"), but index files not from InvertedIndexContext
		// because index files may already be merged (before `kill -9`) and it means not visible in InvertedIndexContext
		startJ = 0
		for _, item := range hc.files {
			if item.startTxNum < r.historyStartTxNum {
				startJ++
				continue
			}
			if item.endTxNum > r.historyEndTxNum {
				break
			}

			historyFiles = append(historyFiles, item.src)
			idxFile, ok := hc.h.InvertedIndex.files.Get(item.src)
			if ok {
				indexFiles = append(indexFiles, idxFile)
			} else {
				walkErr := fmt.Errorf("History.staticFilesInRange: required file not found: %s.%d-%d.efi", hc.h.filenameBase, item.startTxNum/hc.h.aggregationStep, item.endTxNum/hc.h.aggregationStep)
				return nil, nil, 0, walkErr
			}
		}

		for _, f := range historyFiles {
			if f == nil {
				panic("must not happen")
			}
		}
		if r.index && len(indexFiles) != len(historyFiles) {
			var sIdx, sHist []string
			for _, f := range indexFiles {
				if f.index != nil {
					_, fName := filepath.Split(f.index.FilePath())
					sIdx = append(sIdx, fmt.Sprintf("%+v", fName))
				}
			}
			for _, f := range historyFiles {
				if f.decompressor != nil {
					_, fName := filepath.Split(f.decompressor.FilePath())
					sHist = append(sHist, fmt.Sprintf("%+v", fName))
				}
			}
			log.Warn("[snapshots] something wrong with files for merge", "idx", strings.Join(sIdx, ","), "hist", strings.Join(sHist, ","))
		}
	}
	return
}

// nolint
func (h *History) staticFilesInRange(r HistoryRanges, hc *HistoryContext) (indexFiles, historyFiles []*filesItem, startJ int, err error) {
	panic("deprecated: use HistoryContext.staticFilesInRange")
}

func mergeEfs(preval, val, buf []byte) ([]byte, error) {
	preef, _ := eliasfano32.ReadEliasFano(preval)
	ef, _ := eliasfano32.ReadEliasFano(val)
	preIt := preef.Iterator()
	efIt := ef.Iterator()
	newEf := eliasfano32.NewEliasFano(preef.Count()+ef.Count(), ef.Max())
	for preIt.HasNext() {
		v, err := preIt.Next()
		if err != nil {
			return nil, err
		}
		newEf.AddOffset(v)
	}
	for efIt.HasNext() {
		v, err := efIt.Next()
		if err != nil {
			return nil, err
		}
		newEf.AddOffset(v)
	}
	newEf.Build()
	return newEf.AppendBytes(buf), nil
}

func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, historyFiles []*filesItem, r DomainRanges, workers int, ps *background.ProgressSet) (valuesIn, indexIn, historyIn *filesItem, err error) {
	if !r.any() {
		return
	}
	var comp *compress.Compressor
	closeItem := true

	defer func() {
		if closeItem {
			if comp != nil {
				comp.Close()
			}
			if indexIn != nil {
				if indexIn.decompressor != nil {
					indexIn.decompressor.Close()
				}
				if indexIn.index != nil {
					indexIn.index.Close()
				}
				if indexIn.bindex != nil {
					indexIn.bindex.Close()
				}
			}
			if historyIn != nil {
				if historyIn.decompressor != nil {
					historyIn.decompressor.Close()
				}
				if historyIn.index != nil {
					historyIn.index.Close()
				}
				if historyIn.bindex != nil {
					historyIn.bindex.Close()
				}
			}
			if valuesIn != nil {
				if valuesIn.decompressor != nil {
					valuesIn.decompressor.Close()
				}
				if valuesIn.index != nil {
					valuesIn.index.Close()
				}
				if valuesIn.bindex != nil {
					valuesIn.bindex.Close()
				}
			}
		}
	}()
	if indexIn, historyIn, err = d.History.mergeFiles(ctx, indexFiles, historyFiles,
		HistoryRanges{
			historyStartTxNum: r.historyStartTxNum,
			historyEndTxNum:   r.historyEndTxNum,
			history:           r.history,
			indexStartTxNum:   r.indexStartTxNum,
			indexEndTxNum:     r.indexEndTxNum,
			index:             r.index}, workers, ps); err != nil {
		return nil, nil, nil, err
	}
	if r.values {
		for _, f := range valuesFiles {
			defer f.decompressor.EnableMadvNormal().DisableReadAhead()
		}
		datFileName := fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, r.valuesStartTxNum/d.aggregationStep, r.valuesEndTxNum/d.aggregationStep)
		datPath := filepath.Join(d.dir, datFileName)
		if comp, err = compress.NewCompressor(ctx, "merge", datPath, d.tmpdir, compress.MinPatternScore, workers, log.LvlTrace, d.logger); err != nil {
			return nil, nil, nil, fmt.Errorf("merge %s history compressor: %w", d.filenameBase, err)
		}
		if d.noFsync {
			comp.DisableFsync()
		}
		p := ps.AddNew("merege "+datFileName, 1)
		defer ps.Delete(p)

		var cp CursorHeap
		heap.Init(&cp)
		for _, item := range valuesFiles {
			g := item.decompressor.MakeGetter()
			g.Reset(0)
			if g.HasNext() {
				key, _ := g.NextUncompressed()
				var val []byte
				if d.compressVals {
					val, _ = g.Next(nil)
				} else {
					val, _ = g.NextUncompressed()
				}
				heap.Push(&cp, &CursorItem{
					t:        FILE_CURSOR,
					dg:       g,
					key:      key,
					val:      val,
					endTxNum: item.endTxNum,
					reverse:  true,
				})
			}
		}
		keyCount := 0
		// In the loop below, the pair `keyBuf=>valBuf` is always 1 item behind `lastKey=>lastVal`.
		// `lastKey` and `lastVal` are taken from the top of the multi-way merge (assisted by the CursorHeap cp), but not processed right away
		// instead, the pair from the previous iteration is processed first - `keyBuf=>valBuf`. After that, `keyBuf` and `valBuf` are assigned
		// to `lastKey` and `lastVal` correspondingly, and the next step of multi-way merge happens. Therefore, after the multi-way merge loop
		// (when CursorHeap cp is empty), there is a need to process the last pair `keyBuf=>valBuf`, because it was one step behind
		var keyBuf, valBuf []byte
		for cp.Len() > 0 {
			lastKey := common.Copy(cp[0].key)
			lastVal := common.Copy(cp[0].val)
			// Advance all the items that have this key (including the top)
			for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) {
				ci1 := cp[0]
				if ci1.dg.HasNext() {
					ci1.key, _ = ci1.dg.NextUncompressed()
					if d.compressVals {
						ci1.val, _ = ci1.dg.Next(ci1.val[:0])
					} else {
						ci1.val, _ = ci1.dg.NextUncompressed()
					}
					heap.Fix(&cp, 0)
				} else {
					heap.Pop(&cp)
				}
			}

			// empty value means deletion
			deleted := r.valuesStartTxNum == 0 && len(lastVal) == 0
			if !deleted {
				if keyBuf != nil {
					if err = comp.AddUncompressedWord(keyBuf); err != nil {
						return nil, nil, nil, err
					}
					keyCount++ // Only counting keys, not values
					switch d.compressVals {
					case true:
						if err = comp.AddWord(valBuf); err != nil {
							return nil, nil, nil, err
						}
					default:
						if err = comp.AddUncompressedWord(valBuf); err != nil {
							return nil, nil, nil, err
						}
					}
				}
				keyBuf = append(keyBuf[:0], lastKey...)
				valBuf = append(valBuf[:0], lastVal...)
			}
		}
		if keyBuf != nil {
			if err = comp.AddUncompressedWord(keyBuf); err != nil {
				return nil, nil, nil, err
			}
			keyCount++ // Only counting keys, not values
			if d.compressVals {
				if err = comp.AddWord(valBuf); err != nil {
					return nil, nil, nil, err
				}
			} else {
				if err = comp.AddUncompressedWord(valBuf); err != nil {
					return nil, nil, nil, err
				}
			}
		}
		if err = comp.Compress(); err != nil {
			return nil, nil, nil, err
		}
		comp.Close()
		comp = nil
		ps.Delete(p)
		valuesIn = newFilesItem(r.valuesStartTxNum, r.valuesEndTxNum, d.aggregationStep)
		if valuesIn.decompressor, err = compress.NewDecompressor(datPath); err != nil {
			return nil, nil, nil, fmt.Errorf("merge %s decompressor [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
		}

		idxFileName := fmt.Sprintf("%s.%d-%d.kvi", d.filenameBase, r.valuesStartTxNum/d.aggregationStep, r.valuesEndTxNum/d.aggregationStep)
		idxPath := filepath.Join(d.dir, idxFileName)
		p = ps.AddNew("merge "+idxFileName, uint64(keyCount*2))
		defer ps.Delete(p)
		ps.Delete(p)

		//		if valuesIn.index, err = buildIndex(valuesIn.decompressor, idxPath, d.dir, keyCount, false /* values */); err != nil {
		if valuesIn.index, err = buildIndexThenOpen(ctx, valuesIn.decompressor, idxPath, d.tmpdir, keyCount, false /* values */, p, d.logger, d.noFsync); err != nil {
			return nil, nil, nil, fmt.Errorf("merge %s buildIndex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
		}

		btFileName := strings.TrimSuffix(idxFileName, "kvi") + "bt"
		p = ps.AddNew(btFileName, uint64(keyCount*2))
		defer ps.Delete(p)
		btPath := filepath.Join(d.dir, btFileName)
		err = BuildBtreeIndexWithDecompressor(btPath, valuesIn.decompressor, p, d.tmpdir, d.logger)
		if err != nil {
			return nil, nil, nil, fmt.Errorf("merge %s btindex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
		}

		bt, err := OpenBtreeIndexWithDecompressor(btPath, DefaultBtreeM, valuesIn.decompressor)
		if err != nil {
			return nil, nil, nil, fmt.Errorf("merge %s btindex2 [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
		}
		valuesIn.bindex = bt
	}
	closeItem = false
	d.stats.MergesCount++
	return
}

func (ii *InvertedIndex) mergeFiles(ctx context.Context, files []*filesItem, startTxNum, endTxNum uint64, workers int, ps *background.ProgressSet) (*filesItem, error) {
	for _, h := range files {
		defer h.decompressor.EnableMadvNormal().DisableReadAhead()
	}

	var outItem *filesItem
	var comp *compress.Compressor
	var decomp *compress.Decompressor
	var err error
	var closeItem = true
	defer func() {
		if closeItem {
			if comp != nil {
				comp.Close()
			}
			if decomp != nil {
				decomp.Close()
			}
			if outItem != nil {
				if outItem.decompressor != nil {
					outItem.decompressor.Close()
				}
				if outItem.index != nil {
					outItem.index.Close()
				}
				outItem = nil
			}
		}
	}()
	if ctx.Err() != nil {
		return nil, ctx.Err()
	}

	datFileName := fmt.Sprintf("%s.%d-%d.ef", ii.filenameBase, startTxNum/ii.aggregationStep, endTxNum/ii.aggregationStep)
	datPath := filepath.Join(ii.dir, datFileName)
	if comp, err = compress.NewCompressor(ctx, "Snapshots merge", datPath, ii.tmpdir, compress.MinPatternScore, workers, log.LvlTrace, ii.logger); err != nil {
		return nil, fmt.Errorf("merge %s inverted index compressor: %w", ii.filenameBase, err)
	}
	if ii.noFsync {
		comp.DisableFsync()
	}
	p := ps.AddNew("merge "+datFileName, 1)
	defer ps.Delete(p)

	var cp CursorHeap
	heap.Init(&cp)

	for _, item := range files {
		g := item.decompressor.MakeGetter()
		g.Reset(0)
		if g.HasNext() {
			key, _ := g.Next(nil)
			val, _ := g.Next(nil)
			//fmt.Printf("heap push %s [%d] %x\n", item.decompressor.FilePath(), item.endTxNum, key)
			heap.Push(&cp, &CursorItem{
				t:        FILE_CURSOR,
				dg:       g,
				key:      key,
				val:      val,
				endTxNum: item.endTxNum,
				reverse:  true,
			})
		}
	}
	keyCount := 0

	// In the loop below, the pair `keyBuf=>valBuf` is always 1 item behind `lastKey=>lastVal`.
	// `lastKey` and `lastVal` are taken from the top of the multi-way merge (assisted by the CursorHeap cp), but not processed right away
	// instead, the pair from the previous iteration is processed first - `keyBuf=>valBuf`. After that, `keyBuf` and `valBuf` are assigned
	// to `lastKey` and `lastVal` correspondingly, and the next step of multi-way merge happens. Therefore, after the multi-way merge loop
	// (when CursorHeap cp is empty), there is a need to process the last pair `keyBuf=>valBuf`, because it was one step behind
	var keyBuf, valBuf []byte
	for cp.Len() > 0 {
		lastKey := common.Copy(cp[0].key)
		lastVal := common.Copy(cp[0].val)
		var mergedOnce bool

		// Advance all the items that have this key (including the top)
		for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) {
			ci1 := cp[0]
			if mergedOnce {
				if lastVal, err = mergeEfs(ci1.val, lastVal, nil); err != nil {
					return nil, fmt.Errorf("merge %s inverted index: %w", ii.filenameBase, err)
				}
			} else {
				mergedOnce = true
			}
			//fmt.Printf("multi-way %s [%d] %x\n", ii.indexKeysTable, ci1.endTxNum, ci1.key)
			if ci1.dg.HasNext() {
				ci1.key, _ = ci1.dg.NextUncompressed()
				ci1.val, _ = ci1.dg.NextUncompressed()
				//fmt.Printf("heap next push %s [%d] %x\n", ii.indexKeysTable, ci1.endTxNum, ci1.key)
				heap.Fix(&cp, 0)
			} else {
				heap.Pop(&cp)
			}
		}
		if keyBuf != nil {
			if err = comp.AddUncompressedWord(keyBuf); err != nil {
				return nil, err
			}
			keyCount++ // Only counting keys, not values
			if err = comp.AddUncompressedWord(valBuf); err != nil {
				return nil, err
			}
		}
		keyBuf = append(keyBuf[:0], lastKey...)
		valBuf = append(valBuf[:0], lastVal...)
	}
	if keyBuf != nil {
		if err = comp.AddUncompressedWord(keyBuf); err != nil {
			return nil, err
		}
		keyCount++ // Only counting keys, not values
		if err = comp.AddUncompressedWord(valBuf); err != nil {
			return nil, err
		}
	}
	if err = comp.Compress(); err != nil {
		return nil, err
	}
	comp.Close()
	comp = nil
	outItem = newFilesItem(startTxNum, endTxNum, ii.aggregationStep)
	if outItem.decompressor, err = compress.NewDecompressor(datPath); err != nil {
		return nil, fmt.Errorf("merge %s decompressor [%d-%d]: %w", ii.filenameBase, startTxNum, endTxNum, err)
	}
	ps.Delete(p)

	idxFileName := fmt.Sprintf("%s.%d-%d.efi", ii.filenameBase, startTxNum/ii.aggregationStep, endTxNum/ii.aggregationStep)
	idxPath := filepath.Join(ii.dir, idxFileName)
	p = ps.AddNew("merge "+idxFileName, uint64(outItem.decompressor.Count()*2))
	defer ps.Delete(p)
	if outItem.index, err = buildIndexThenOpen(ctx, outItem.decompressor, idxPath, ii.tmpdir, keyCount, false /* values */, p, ii.logger, ii.noFsync); err != nil {
		return nil, fmt.Errorf("merge %s buildIndex [%d-%d]: %w", ii.filenameBase, startTxNum, endTxNum, err)
	}
	closeItem = false
	return outItem, nil
}

func (h *History) mergeFiles(ctx context.Context, indexFiles, historyFiles []*filesItem, r HistoryRanges, workers int, ps *background.ProgressSet) (indexIn, historyIn *filesItem, err error) {
	if !r.any() {
		return nil, nil, nil
	}
	var closeIndex = true
	defer func() {
		if closeIndex {
			if indexIn != nil {
				indexIn.decompressor.Close()
				indexIn.index.Close()
			}
		}
	}()
	if indexIn, err = h.InvertedIndex.mergeFiles(ctx, indexFiles, r.indexStartTxNum, r.indexEndTxNum, workers, ps); err != nil {
		return nil, nil, err
	}
	if r.history {
		for _, f := range indexFiles {
			defer f.decompressor.EnableMadvNormal().DisableReadAhead()
		}
		for _, f := range historyFiles {
			defer f.decompressor.EnableMadvNormal().DisableReadAhead()
		}

		var comp *compress.Compressor
		var decomp *compress.Decompressor
		var rs *recsplit.RecSplit
		var index *recsplit.Index
		var closeItem = true
		defer func() {
			if closeItem {
				if comp != nil {
					comp.Close()
				}
				if decomp != nil {
					decomp.Close()
				}
				if rs != nil {
					rs.Close()
				}
				if index != nil {
					index.Close()
				}
				if historyIn != nil {
					if historyIn.decompressor != nil {
						historyIn.decompressor.Close()
					}
					if historyIn.index != nil {
						historyIn.index.Close()
					}
				}
			}
		}()
		datFileName := fmt.Sprintf("%s.%d-%d.v", h.filenameBase, r.historyStartTxNum/h.aggregationStep, r.historyEndTxNum/h.aggregationStep)
		idxFileName := fmt.Sprintf("%s.%d-%d.vi", h.filenameBase, r.historyStartTxNum/h.aggregationStep, r.historyEndTxNum/h.aggregationStep)
		datPath := filepath.Join(h.dir, datFileName)
		idxPath := filepath.Join(h.dir, idxFileName)
		if comp, err = compress.NewCompressor(ctx, "merge", datPath, h.tmpdir, compress.MinPatternScore, workers, log.LvlTrace, h.logger); err != nil {
			return nil, nil, fmt.Errorf("merge %s history compressor: %w", h.filenameBase, err)
		}
		if h.noFsync {
			comp.DisableFsync()
		}
		p := ps.AddNew("merge "+datFileName, 1)
		defer ps.Delete(p)
		var cp CursorHeap
		heap.Init(&cp)
		for _, item := range indexFiles {
			g := item.decompressor.MakeGetter()
			g.Reset(0)
			if g.HasNext() {
				var g2 *compress.Getter
				for _, hi := range historyFiles { // full-scan, because it's ok to have different amount files. by unclean-shutdown.
					if hi.startTxNum == item.startTxNum && hi.endTxNum == item.endTxNum {
						g2 = hi.decompressor.MakeGetter()
						break
					}
				}
				if g2 == nil {
					panic(fmt.Sprintf("for file: %s, not found corresponding file to merge", g.FileName()))
				}
				key, _ := g.NextUncompressed()
				val, _ := g.NextUncompressed()
				heap.Push(&cp, &CursorItem{
					t:        FILE_CURSOR,
					dg:       g,
					dg2:      g2,
					key:      key,
					val:      val,
					endTxNum: item.endTxNum,
					reverse:  false,
				})
			}
		}
		// In the loop below, the pair `keyBuf=>valBuf` is always 1 item behind `lastKey=>lastVal`.
		// `lastKey` and `lastVal` are taken from the top of the multi-way merge (assisted by the CursorHeap cp), but not processed right away
		// instead, the pair from the previous iteration is processed first - `keyBuf=>valBuf`. After that, `keyBuf` and `valBuf` are assigned
		// to `lastKey` and `lastVal` correspondingly, and the next step of multi-way merge happens. Therefore, after the multi-way merge loop
		// (when CursorHeap cp is empty), there is a need to process the last pair `keyBuf=>valBuf`, because it was one step behind
		var valBuf []byte
		var keyCount int
		for cp.Len() > 0 {
			lastKey := common.Copy(cp[0].key)
			// Advance all the items that have this key (including the top)
			for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) {
				ci1 := cp[0]
				count := eliasfano32.Count(ci1.val)
				for i := uint64(0); i < count; i++ {
					if !ci1.dg2.HasNext() {
						panic(fmt.Errorf("assert: no value??? %s, i=%d, count=%d, lastKey=%x, ci1.key=%x", ci1.dg2.FileName(), i, count, lastKey, ci1.key))
					}

					if h.compressVals {
						valBuf, _ = ci1.dg2.Next(valBuf[:0])
						if err = comp.AddWord(valBuf); err != nil {
							return nil, nil, err
						}
					} else {
						valBuf, _ = ci1.dg2.NextUncompressed()
						if err = comp.AddUncompressedWord(valBuf); err != nil {
							return nil, nil, err
						}
					}
				}
				keyCount += int(count)
				if ci1.dg.HasNext() {
					ci1.key, _ = ci1.dg.NextUncompressed()
					ci1.val, _ = ci1.dg.NextUncompressed()
					heap.Fix(&cp, 0)
				} else {
					heap.Remove(&cp, 0)
				}
			}
		}
		if err = comp.Compress(); err != nil {
			return nil, nil, err
		}
		comp.Close()
		comp = nil
		if decomp, err = compress.NewDecompressor(datPath); err != nil {
			return nil, nil, err
		}
		ps.Delete(p)

		p = ps.AddNew("merge "+idxFileName, uint64(2*keyCount))
		defer ps.Delete(p)
		if rs, err = recsplit.NewRecSplit(recsplit.RecSplitArgs{
			KeyCount:   keyCount,
			Enums:      false,
			BucketSize: 2000,
			LeafSize:   8,
			TmpDir:     h.tmpdir,
			IndexFile:  idxPath,
		}, h.logger); err != nil {
			return nil, nil, fmt.Errorf("create recsplit: %w", err)
		}
		rs.LogLvl(log.LvlTrace)
		if h.noFsync {
			rs.DisableFsync()
		}
		var historyKey []byte
		var txKey [8]byte
		var valOffset uint64
		g := indexIn.decompressor.MakeGetter()
		g2 := decomp.MakeGetter()
		var keyBuf []byte
		for {
			g.Reset(0)
			g2.Reset(0)
			valOffset = 0
			for g.HasNext() {
				keyBuf, _ = g.NextUncompressed()
				valBuf, _ = g.NextUncompressed()
				ef, _ := eliasfano32.ReadEliasFano(valBuf)
				efIt := ef.Iterator()
				for efIt.HasNext() {
					txNum, _ := efIt.Next()
					binary.BigEndian.PutUint64(txKey[:], txNum)
					historyKey = append(append(historyKey[:0], txKey[:]...), keyBuf...)
					if err = rs.AddKey(historyKey, valOffset); err != nil {
						return nil, nil, err
					}
					if h.compressVals {
						valOffset, _ = g2.Skip()
					} else {
						valOffset, _ = g2.SkipUncompressed()
					}
				}
				p.Processed.Add(1)
			}
			if err = rs.Build(ctx); err != nil {
				if rs.Collision() {
					log.Info("Building recsplit. Collision happened. It's ok. Restarting...")
					rs.ResetNextSalt()
				} else {
					return nil, nil, fmt.Errorf("build %s idx: %w", h.filenameBase, err)
				}
			} else {
				break
			}
		}
		rs.Close()
		rs = nil
		if index, err = recsplit.OpenIndex(idxPath); err != nil {
			return nil, nil, fmt.Errorf("open %s idx: %w", h.filenameBase, err)
		}
		historyIn = newFilesItem(r.historyStartTxNum, r.historyEndTxNum, h.aggregationStep)
		historyIn.decompressor = decomp
		historyIn.index = index

		closeItem = false
	}

	closeIndex = false
	return
}

func (d *Domain) integrateMergedFiles(valuesOuts, indexOuts, historyOuts []*filesItem, valuesIn, indexIn, historyIn *filesItem) {
	d.History.integrateMergedFiles(indexOuts, historyOuts, indexIn, historyIn)
	if valuesIn != nil {
		d.files.Set(valuesIn)

		// `kill -9` may leave some garbage
		// but it still may be useful for merges, until we finish merge frozen file
		if historyIn != nil && historyIn.frozen {
			d.files.Walk(func(items []*filesItem) bool {
				for _, item := range items {
					if item.frozen || item.endTxNum > valuesIn.endTxNum {
						continue
					}
					valuesOuts = append(valuesOuts, item)
				}
				return true
			})
		}
	}
	for _, out := range valuesOuts {
		if out == nil {
			panic("must not happen")
		}
		d.files.Delete(out)
		out.canDelete.Store(true)
	}
	d.reCalcRoFiles()
}

func (ii *InvertedIndex) integrateMergedFiles(outs []*filesItem, in *filesItem) {
	if in != nil {
		ii.files.Set(in)

		// `kill -9` may leave some garbage
		// but it still may be useful for merges, until we finish merge frozen file
		if in.frozen {
			ii.files.Walk(func(items []*filesItem) bool {
				for _, item := range items {
					if item.frozen || item.endTxNum > in.endTxNum {
						continue
					}
					outs = append(outs, item)
				}
				return true
			})
		}
	}
	for _, out := range outs {
		if out == nil {
			panic("must not happen: " + ii.filenameBase)
		}
		ii.files.Delete(out)
		out.canDelete.Store(true)
	}
	ii.reCalcRoFiles()
}

func (h *History) integrateMergedFiles(indexOuts, historyOuts []*filesItem, indexIn, historyIn *filesItem) {
	h.InvertedIndex.integrateMergedFiles(indexOuts, indexIn)
	//TODO: handle collision
	if historyIn != nil {
		h.files.Set(historyIn)

		// `kill -9` may leave some garbage
		// but it still may be useful for merges, until we finish merge frozen file
		if historyIn.frozen {
			h.files.Walk(func(items []*filesItem) bool {
				for _, item := range items {
					if item.frozen || item.endTxNum > historyIn.endTxNum {
						continue
					}
					historyOuts = append(historyOuts, item)
				}
				return true
			})
		}
	}
	for _, out := range historyOuts {
		if out == nil {
			panic("must not happen: " + h.filenameBase)
		}
		h.files.Delete(out)
		out.canDelete.Store(true)
	}
	h.reCalcRoFiles()
}

// nolint
func (dc *DomainContext) frozenTo() uint64 {
	if len(dc.files) == 0 {
		return 0
	}
	for i := len(dc.files) - 1; i >= 0; i-- {
		if dc.files[i].src.frozen {
			return cmp.Min(dc.files[i].endTxNum, dc.hc.frozenTo())
		}
	}
	return 0
}

func (hc *HistoryContext) frozenTo() uint64 {
	if len(hc.files) == 0 {
		return 0
	}
	for i := len(hc.files) - 1; i >= 0; i-- {
		if hc.files[i].src.frozen {
			return cmp.Min(hc.files[i].endTxNum, hc.ic.frozenTo())
		}
	}
	return 0
}
func (ic *InvertedIndexContext) frozenTo() uint64 {
	if len(ic.files) == 0 {
		return 0
	}
	for i := len(ic.files) - 1; i >= 0; i-- {
		if ic.files[i].src.frozen {
			return ic.files[i].endTxNum
		}
	}
	return 0
}

func (d *Domain) cleanAfterFreeze(frozenTo uint64) {
	if frozenTo == 0 {
		return
	}

	var outs []*filesItem
	// `kill -9` may leave some garbage
	// but it may be useful for merges, until merge `frozen` file
	d.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.frozen || item.endTxNum > frozenTo {
				continue
			}
			outs = append(outs, item)
		}
		return true
	})

	for _, out := range outs {
		if out == nil {
			panic("must not happen: " + d.filenameBase)
		}
		d.files.Delete(out)
		if out.refcount.Load() == 0 {
			// if it has no readers (invisible even for us) - it's safe to remove file right here
			out.closeFilesAndRemove()
		}
		out.canDelete.Store(true)
	}
	d.History.cleanAfterFreeze(frozenTo)
}

// cleanAfterFreeze - mark all small files before `f` as `canDelete=true`
func (h *History) cleanAfterFreeze(frozenTo uint64) {
	if frozenTo == 0 {
		return
	}
	//if h.filenameBase == "accounts" {
	//	log.Warn("[history] History.cleanAfterFreeze", "frozenTo", frozenTo/h.aggregationStep, "stack", dbg.Stack())
	//}
	var outs []*filesItem
	// `kill -9` may leave some garbage
	// but it may be useful for merges, until merge `frozen` file
	h.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.frozen || item.endTxNum > frozenTo {
				continue
			}
			outs = append(outs, item)
		}
		return true
	})

	for _, out := range outs {
		if out == nil {
			panic("must not happen: " + h.filenameBase)
		}
		out.canDelete.Store(true)

		//if out.refcount.Load() == 0 {
		//	if h.filenameBase == "accounts" {
		//		log.Warn("[history] History.cleanAfterFreeze: immediately delete", "name", out.decompressor.FileName())
		//	}
		//} else {
		//	if h.filenameBase == "accounts" {
		//		log.Warn("[history] History.cleanAfterFreeze: mark as 'canDelete=true'", "name", out.decompressor.FileName())
		//	}
		//}

		// if it has no readers (invisible even for us) - it's safe to remove file right here
		if out.refcount.Load() == 0 {
			out.closeFilesAndRemove()
		}
		h.files.Delete(out)
	}
	h.InvertedIndex.cleanAfterFreeze(frozenTo)
}

// cleanAfterFreeze - mark all small files before `f` as `canDelete=true`
func (ii *InvertedIndex) cleanAfterFreeze(frozenTo uint64) {
	if frozenTo == 0 {
		return
	}
	var outs []*filesItem
	// `kill -9` may leave some garbage
	// but it may be useful for merges, until merge `frozen` file
	ii.files.Walk(func(items []*filesItem) bool {
		for _, item := range items {
			if item.frozen || item.endTxNum > frozenTo {
				continue
			}
			outs = append(outs, item)
		}
		return true
	})

	for _, out := range outs {
		if out == nil {
			panic("must not happen: " + ii.filenameBase)
		}
		out.canDelete.Store(true)
		if out.refcount.Load() == 0 {
			// if it has no readers (invisible even for us) - it's safe to remove file right here
			out.closeFilesAndRemove()
		}
		ii.files.Delete(out)
	}
}

// nolint
func (d *Domain) deleteGarbageFiles() {
	for _, item := range d.garbageFiles {
		// paranoic-mode: don't delete frozen files
		steps := item.endTxNum/d.aggregationStep - item.startTxNum/d.aggregationStep
		if steps%StepsInBiggestFile == 0 {
			continue
		}
		f1 := fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, item.startTxNum/d.aggregationStep, item.endTxNum/d.aggregationStep)
		os.Remove(filepath.Join(d.dir, f1))
		log.Debug("[snapshots] delete garbage", f1)
		f2 := fmt.Sprintf("%s.%d-%d.kvi", d.filenameBase, item.startTxNum/d.aggregationStep, item.endTxNum/d.aggregationStep)
		os.Remove(filepath.Join(d.dir, f2))
		log.Debug("[snapshots] delete garbage", f2)
	}
	d.garbageFiles = nil
	d.History.deleteGarbageFiles()
}
func (h *History) deleteGarbageFiles() {
	for _, item := range h.garbageFiles {
		// paranoic-mode: don't delete frozen files
		if item.endTxNum/h.aggregationStep-item.startTxNum/h.aggregationStep == StepsInBiggestFile {
			continue
		}
		f1 := fmt.Sprintf("%s.%d-%d.v", h.filenameBase, item.startTxNum/h.aggregationStep, item.endTxNum/h.aggregationStep)
		os.Remove(filepath.Join(h.dir, f1))
		log.Debug("[snapshots] delete garbage", f1)
		f2 := fmt.Sprintf("%s.%d-%d.vi", h.filenameBase, item.startTxNum/h.aggregationStep, item.endTxNum/h.aggregationStep)
		os.Remove(filepath.Join(h.dir, f2))
		log.Debug("[snapshots] delete garbage", f2)
	}
	h.garbageFiles = nil
	h.InvertedIndex.deleteGarbageFiles()
}
func (ii *InvertedIndex) deleteGarbageFiles() {
	for _, item := range ii.garbageFiles {
		// paranoic-mode: don't delete frozen files
		if item.endTxNum/ii.aggregationStep-item.startTxNum/ii.aggregationStep == StepsInBiggestFile {
			continue
		}
		f1 := fmt.Sprintf("%s.%d-%d.ef", ii.filenameBase, item.startTxNum/ii.aggregationStep, item.endTxNum/ii.aggregationStep)
		os.Remove(filepath.Join(ii.dir, f1))
		log.Debug("[snapshots] delete garbage", f1)
		f2 := fmt.Sprintf("%s.%d-%d.efi", ii.filenameBase, item.startTxNum/ii.aggregationStep, item.endTxNum/ii.aggregationStep)
		os.Remove(filepath.Join(ii.dir, f2))
		log.Debug("[snapshots] delete garbage", f2)
	}
	ii.garbageFiles = nil
}