mirror of
https://gitlab.com/pulsechaincom/erigon-pulse.git
synced 2025-01-18 08:38:46 +00:00
231e468e19
git-subtree-dir: erigon-lib git-subtree-mainline:3c8cbda809
git-subtree-split:93d9c9d9fe
1297 lines
38 KiB
Go
1297 lines
38 KiB
Go
/*
|
|
Copyright 2022 Erigon contributors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package state
|
|
|
|
import (
|
|
"bytes"
|
|
"container/heap"
|
|
"context"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/ledgerwatch/erigon-lib/common/background"
|
|
"github.com/ledgerwatch/log/v3"
|
|
|
|
"github.com/ledgerwatch/erigon-lib/common"
|
|
"github.com/ledgerwatch/erigon-lib/common/cmp"
|
|
"github.com/ledgerwatch/erigon-lib/compress"
|
|
"github.com/ledgerwatch/erigon-lib/recsplit"
|
|
"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
|
|
)
|
|
|
|
func (d *Domain) endTxNumMinimax() uint64 {
|
|
minimax := d.History.endTxNumMinimax()
|
|
if max, ok := d.files.Max(); ok {
|
|
endTxNum := max.endTxNum
|
|
if minimax == 0 || endTxNum < minimax {
|
|
minimax = endTxNum
|
|
}
|
|
}
|
|
return minimax
|
|
}
|
|
|
|
func (ii *InvertedIndex) endTxNumMinimax() uint64 {
|
|
var minimax uint64
|
|
if max, ok := ii.files.Max(); ok {
|
|
endTxNum := max.endTxNum
|
|
if minimax == 0 || endTxNum < minimax {
|
|
minimax = endTxNum
|
|
}
|
|
}
|
|
return minimax
|
|
}
|
|
func (ii *InvertedIndex) endIndexedTxNumMinimax(needFrozen bool) uint64 {
|
|
var max uint64
|
|
ii.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.index == nil || (needFrozen && !item.frozen) {
|
|
continue
|
|
}
|
|
max = cmp.Max(max, item.endTxNum)
|
|
}
|
|
return true
|
|
})
|
|
return max
|
|
}
|
|
|
|
func (h *History) endTxNumMinimax() uint64 {
|
|
minimax := h.InvertedIndex.endTxNumMinimax()
|
|
if max, ok := h.files.Max(); ok {
|
|
endTxNum := max.endTxNum
|
|
if minimax == 0 || endTxNum < minimax {
|
|
minimax = endTxNum
|
|
}
|
|
}
|
|
return minimax
|
|
}
|
|
func (h *History) endIndexedTxNumMinimax(needFrozen bool) uint64 {
|
|
var max uint64
|
|
h.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.index == nil || (needFrozen && !item.frozen) {
|
|
continue
|
|
}
|
|
max = cmp.Max(max, item.endTxNum)
|
|
}
|
|
return true
|
|
})
|
|
return cmp.Min(max, h.InvertedIndex.endIndexedTxNumMinimax(needFrozen))
|
|
}
|
|
|
|
type DomainRanges struct {
|
|
valuesStartTxNum uint64
|
|
valuesEndTxNum uint64
|
|
historyStartTxNum uint64
|
|
historyEndTxNum uint64
|
|
indexStartTxNum uint64
|
|
indexEndTxNum uint64
|
|
values bool
|
|
history bool
|
|
index bool
|
|
}
|
|
|
|
func (r DomainRanges) String() string {
|
|
var b strings.Builder
|
|
if r.values {
|
|
b.WriteString(fmt.Sprintf("Values: [%d, %d)", r.valuesStartTxNum, r.valuesEndTxNum))
|
|
}
|
|
if r.history {
|
|
if b.Len() > 0 {
|
|
b.WriteString(", ")
|
|
}
|
|
b.WriteString(fmt.Sprintf("History: [%d, %d)", r.historyStartTxNum, r.historyEndTxNum))
|
|
}
|
|
if r.index {
|
|
if b.Len() > 0 {
|
|
b.WriteString(", ")
|
|
}
|
|
b.WriteString(fmt.Sprintf("Index: [%d, %d)", r.indexStartTxNum, r.indexEndTxNum))
|
|
}
|
|
return b.String()
|
|
}
|
|
|
|
func (r DomainRanges) any() bool {
|
|
return r.values || r.history || r.index
|
|
}
|
|
|
|
// findMergeRange assumes that all fTypes in d.files have items at least as far as maxEndTxNum
|
|
// That is why only Values type is inspected
|
|
func (d *Domain) findMergeRange(maxEndTxNum, maxSpan uint64) DomainRanges {
|
|
hr := d.History.findMergeRange(maxEndTxNum, maxSpan)
|
|
r := DomainRanges{
|
|
historyStartTxNum: hr.historyStartTxNum,
|
|
historyEndTxNum: hr.historyEndTxNum,
|
|
history: hr.history,
|
|
indexStartTxNum: hr.indexStartTxNum,
|
|
indexEndTxNum: hr.indexEndTxNum,
|
|
index: hr.index,
|
|
}
|
|
d.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.endTxNum > maxEndTxNum {
|
|
return false
|
|
}
|
|
endStep := item.endTxNum / d.aggregationStep
|
|
spanStep := endStep & -endStep // Extract rightmost bit in the binary representation of endStep, this corresponds to size of maximally possible merge ending at endStep
|
|
span := cmp.Min(spanStep*d.aggregationStep, maxSpan)
|
|
start := item.endTxNum - span
|
|
if start < item.startTxNum {
|
|
if !r.values || start < r.valuesStartTxNum {
|
|
r.values = true
|
|
r.valuesStartTxNum = start
|
|
r.valuesEndTxNum = item.endTxNum
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
})
|
|
return r
|
|
}
|
|
|
|
// 0-1,1-2,2-3,3-4: allow merge 0-1
|
|
// 0-2,2-3,3-4: allow merge 0-4
|
|
// 0-2,2-4: allow merge 0-4
|
|
//
|
|
// 0-1,1-2,2-3: allow merge 0-2
|
|
//
|
|
// 0-2,2-3: nothing to merge
|
|
func (ii *InvertedIndex) findMergeRange(maxEndTxNum, maxSpan uint64) (bool, uint64, uint64) {
|
|
var minFound bool
|
|
var startTxNum, endTxNum uint64
|
|
ii.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.endTxNum > maxEndTxNum {
|
|
continue
|
|
}
|
|
endStep := item.endTxNum / ii.aggregationStep
|
|
spanStep := endStep & -endStep // Extract rightmost bit in the binary representation of endStep, this corresponds to size of maximally possible merge ending at endStep
|
|
span := cmp.Min(spanStep*ii.aggregationStep, maxSpan)
|
|
start := item.endTxNum - span
|
|
foundSuperSet := startTxNum == item.startTxNum && item.endTxNum >= endTxNum
|
|
if foundSuperSet {
|
|
minFound = false
|
|
startTxNum = start
|
|
endTxNum = item.endTxNum
|
|
} else if start < item.startTxNum {
|
|
if !minFound || start < startTxNum {
|
|
minFound = true
|
|
startTxNum = start
|
|
endTxNum = item.endTxNum
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
})
|
|
return minFound, startTxNum, endTxNum
|
|
}
|
|
|
|
func (ii *InvertedIndex) mergeRangesUpTo(ctx context.Context, maxTxNum, maxSpan uint64, workers int, ictx *InvertedIndexContext, ps *background.ProgressSet) (err error) {
|
|
closeAll := true
|
|
for updated, startTx, endTx := ii.findMergeRange(maxSpan, maxTxNum); updated; updated, startTx, endTx = ii.findMergeRange(maxTxNum, maxSpan) {
|
|
staticFiles, _ := ictx.staticFilesInRange(startTx, endTx)
|
|
defer func() {
|
|
if closeAll {
|
|
for _, i := range staticFiles {
|
|
i.decompressor.Close()
|
|
i.index.Close()
|
|
}
|
|
}
|
|
}()
|
|
|
|
mergedIndex, err := ii.mergeFiles(ctx, staticFiles, startTx, endTx, workers, ps)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
if closeAll {
|
|
mergedIndex.decompressor.Close()
|
|
mergedIndex.index.Close()
|
|
}
|
|
}()
|
|
|
|
ii.integrateMergedFiles(staticFiles, mergedIndex)
|
|
if mergedIndex.frozen {
|
|
ii.cleanAfterFreeze(mergedIndex.endTxNum)
|
|
}
|
|
}
|
|
closeAll = false
|
|
return nil
|
|
}
|
|
|
|
type HistoryRanges struct {
|
|
historyStartTxNum uint64
|
|
historyEndTxNum uint64
|
|
indexStartTxNum uint64
|
|
indexEndTxNum uint64
|
|
history bool
|
|
index bool
|
|
}
|
|
|
|
func (r HistoryRanges) String(aggStep uint64) string {
|
|
var str string
|
|
if r.history {
|
|
str += fmt.Sprintf("hist: %d-%d, ", r.historyStartTxNum/aggStep, r.historyEndTxNum/aggStep)
|
|
}
|
|
if r.index {
|
|
str += fmt.Sprintf("idx: %d-%d", r.indexStartTxNum/aggStep, r.indexEndTxNum/aggStep)
|
|
}
|
|
return str
|
|
}
|
|
func (r HistoryRanges) any() bool {
|
|
return r.history || r.index
|
|
}
|
|
|
|
func (h *History) findMergeRange(maxEndTxNum, maxSpan uint64) HistoryRanges {
|
|
var r HistoryRanges
|
|
r.index, r.indexStartTxNum, r.indexEndTxNum = h.InvertedIndex.findMergeRange(maxEndTxNum, maxSpan)
|
|
h.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.endTxNum > maxEndTxNum {
|
|
continue
|
|
}
|
|
endStep := item.endTxNum / h.aggregationStep
|
|
spanStep := endStep & -endStep // Extract rightmost bit in the binary representation of endStep, this corresponds to size of maximally possible merge ending at endStep
|
|
span := cmp.Min(spanStep*h.aggregationStep, maxSpan)
|
|
start := item.endTxNum - span
|
|
foundSuperSet := r.indexStartTxNum == item.startTxNum && item.endTxNum >= r.historyEndTxNum
|
|
if foundSuperSet {
|
|
r.history = false
|
|
r.historyStartTxNum = start
|
|
r.historyEndTxNum = item.endTxNum
|
|
} else if start < item.startTxNum {
|
|
if !r.history || start < r.historyStartTxNum {
|
|
r.history = true
|
|
r.historyStartTxNum = start
|
|
r.historyEndTxNum = item.endTxNum
|
|
}
|
|
}
|
|
}
|
|
return true
|
|
})
|
|
|
|
if r.history && r.index {
|
|
// history is behind idx: then merge only history
|
|
historyIsAgead := r.historyEndTxNum > r.indexEndTxNum
|
|
if historyIsAgead {
|
|
r.history, r.historyStartTxNum, r.historyEndTxNum = false, 0, 0
|
|
return r
|
|
}
|
|
|
|
historyIsBehind := r.historyEndTxNum < r.indexEndTxNum
|
|
if historyIsBehind {
|
|
r.index, r.indexStartTxNum, r.indexEndTxNum = false, 0, 0
|
|
return r
|
|
}
|
|
}
|
|
return r
|
|
}
|
|
|
|
// staticFilesInRange returns list of static files with txNum in specified range [startTxNum; endTxNum)
|
|
// files are in the descending order of endTxNum
|
|
func (dc *DomainContext) staticFilesInRange(r DomainRanges) (valuesFiles, indexFiles, historyFiles []*filesItem, startJ int) {
|
|
if r.index || r.history {
|
|
var err error
|
|
indexFiles, historyFiles, startJ, err = dc.hc.staticFilesInRange(HistoryRanges{
|
|
historyStartTxNum: r.historyStartTxNum,
|
|
historyEndTxNum: r.historyEndTxNum,
|
|
history: r.history,
|
|
indexStartTxNum: r.indexStartTxNum,
|
|
indexEndTxNum: r.indexEndTxNum,
|
|
index: r.index,
|
|
})
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
if r.values {
|
|
for _, item := range dc.files {
|
|
if item.startTxNum < r.valuesStartTxNum {
|
|
startJ++
|
|
continue
|
|
}
|
|
if item.endTxNum > r.valuesEndTxNum {
|
|
break
|
|
}
|
|
valuesFiles = append(valuesFiles, item.src)
|
|
}
|
|
for _, f := range valuesFiles {
|
|
if f == nil {
|
|
panic("must not happen")
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// nolint
|
|
func (d *Domain) staticFilesInRange(r DomainRanges, dc *DomainContext) (valuesFiles, indexFiles, historyFiles []*filesItem, startJ int) {
|
|
panic("deprecated: use DomainContext.staticFilesInRange")
|
|
}
|
|
func (ic *InvertedIndexContext) staticFilesInRange(startTxNum, endTxNum uint64) ([]*filesItem, int) {
|
|
files := make([]*filesItem, 0, len(ic.files))
|
|
var startJ int
|
|
|
|
for _, item := range ic.files {
|
|
if item.startTxNum < startTxNum {
|
|
startJ++
|
|
continue
|
|
}
|
|
if item.endTxNum > endTxNum {
|
|
break
|
|
}
|
|
files = append(files, item.src)
|
|
}
|
|
for _, f := range files {
|
|
if f == nil {
|
|
panic("must not happen")
|
|
}
|
|
}
|
|
|
|
return files, startJ
|
|
}
|
|
|
|
// nolint
|
|
func (ii *InvertedIndex) staticFilesInRange(startTxNum, endTxNum uint64, ic *InvertedIndexContext) ([]*filesItem, int) {
|
|
panic("deprecated: use InvertedIndexContext.staticFilesInRange")
|
|
}
|
|
|
|
func (hc *HistoryContext) staticFilesInRange(r HistoryRanges) (indexFiles, historyFiles []*filesItem, startJ int, err error) {
|
|
if !r.history && r.index {
|
|
indexFiles, startJ = hc.ic.staticFilesInRange(r.indexStartTxNum, r.indexEndTxNum)
|
|
return indexFiles, historyFiles, startJ, nil
|
|
}
|
|
|
|
if r.history {
|
|
// Get history files from HistoryContext (no "garbage/overalps"), but index files not from InvertedIndexContext
|
|
// because index files may already be merged (before `kill -9`) and it means not visible in InvertedIndexContext
|
|
startJ = 0
|
|
for _, item := range hc.files {
|
|
if item.startTxNum < r.historyStartTxNum {
|
|
startJ++
|
|
continue
|
|
}
|
|
if item.endTxNum > r.historyEndTxNum {
|
|
break
|
|
}
|
|
|
|
historyFiles = append(historyFiles, item.src)
|
|
idxFile, ok := hc.h.InvertedIndex.files.Get(item.src)
|
|
if ok {
|
|
indexFiles = append(indexFiles, idxFile)
|
|
} else {
|
|
walkErr := fmt.Errorf("History.staticFilesInRange: required file not found: %s.%d-%d.efi", hc.h.filenameBase, item.startTxNum/hc.h.aggregationStep, item.endTxNum/hc.h.aggregationStep)
|
|
return nil, nil, 0, walkErr
|
|
}
|
|
}
|
|
|
|
for _, f := range historyFiles {
|
|
if f == nil {
|
|
panic("must not happen")
|
|
}
|
|
}
|
|
if r.index && len(indexFiles) != len(historyFiles) {
|
|
var sIdx, sHist []string
|
|
for _, f := range indexFiles {
|
|
if f.index != nil {
|
|
_, fName := filepath.Split(f.index.FilePath())
|
|
sIdx = append(sIdx, fmt.Sprintf("%+v", fName))
|
|
}
|
|
}
|
|
for _, f := range historyFiles {
|
|
if f.decompressor != nil {
|
|
_, fName := filepath.Split(f.decompressor.FilePath())
|
|
sHist = append(sHist, fmt.Sprintf("%+v", fName))
|
|
}
|
|
}
|
|
log.Warn("[snapshots] something wrong with files for merge", "idx", strings.Join(sIdx, ","), "hist", strings.Join(sHist, ","))
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
// nolint
|
|
func (h *History) staticFilesInRange(r HistoryRanges, hc *HistoryContext) (indexFiles, historyFiles []*filesItem, startJ int, err error) {
|
|
panic("deprecated: use HistoryContext.staticFilesInRange")
|
|
}
|
|
|
|
func mergeEfs(preval, val, buf []byte) ([]byte, error) {
|
|
preef, _ := eliasfano32.ReadEliasFano(preval)
|
|
ef, _ := eliasfano32.ReadEliasFano(val)
|
|
preIt := preef.Iterator()
|
|
efIt := ef.Iterator()
|
|
newEf := eliasfano32.NewEliasFano(preef.Count()+ef.Count(), ef.Max())
|
|
for preIt.HasNext() {
|
|
v, err := preIt.Next()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
newEf.AddOffset(v)
|
|
}
|
|
for efIt.HasNext() {
|
|
v, err := efIt.Next()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
newEf.AddOffset(v)
|
|
}
|
|
newEf.Build()
|
|
return newEf.AppendBytes(buf), nil
|
|
}
|
|
|
|
func (d *Domain) mergeFiles(ctx context.Context, valuesFiles, indexFiles, historyFiles []*filesItem, r DomainRanges, workers int, ps *background.ProgressSet) (valuesIn, indexIn, historyIn *filesItem, err error) {
|
|
if !r.any() {
|
|
return
|
|
}
|
|
var comp *compress.Compressor
|
|
closeItem := true
|
|
|
|
defer func() {
|
|
if closeItem {
|
|
if comp != nil {
|
|
comp.Close()
|
|
}
|
|
if indexIn != nil {
|
|
if indexIn.decompressor != nil {
|
|
indexIn.decompressor.Close()
|
|
}
|
|
if indexIn.index != nil {
|
|
indexIn.index.Close()
|
|
}
|
|
if indexIn.bindex != nil {
|
|
indexIn.bindex.Close()
|
|
}
|
|
}
|
|
if historyIn != nil {
|
|
if historyIn.decompressor != nil {
|
|
historyIn.decompressor.Close()
|
|
}
|
|
if historyIn.index != nil {
|
|
historyIn.index.Close()
|
|
}
|
|
if historyIn.bindex != nil {
|
|
historyIn.bindex.Close()
|
|
}
|
|
}
|
|
if valuesIn != nil {
|
|
if valuesIn.decompressor != nil {
|
|
valuesIn.decompressor.Close()
|
|
}
|
|
if valuesIn.index != nil {
|
|
valuesIn.index.Close()
|
|
}
|
|
if valuesIn.bindex != nil {
|
|
valuesIn.bindex.Close()
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
if indexIn, historyIn, err = d.History.mergeFiles(ctx, indexFiles, historyFiles,
|
|
HistoryRanges{
|
|
historyStartTxNum: r.historyStartTxNum,
|
|
historyEndTxNum: r.historyEndTxNum,
|
|
history: r.history,
|
|
indexStartTxNum: r.indexStartTxNum,
|
|
indexEndTxNum: r.indexEndTxNum,
|
|
index: r.index}, workers, ps); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
if r.values {
|
|
for _, f := range valuesFiles {
|
|
defer f.decompressor.EnableMadvNormal().DisableReadAhead()
|
|
}
|
|
datFileName := fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, r.valuesStartTxNum/d.aggregationStep, r.valuesEndTxNum/d.aggregationStep)
|
|
datPath := filepath.Join(d.dir, datFileName)
|
|
if comp, err = compress.NewCompressor(ctx, "merge", datPath, d.tmpdir, compress.MinPatternScore, workers, log.LvlTrace, d.logger); err != nil {
|
|
return nil, nil, nil, fmt.Errorf("merge %s history compressor: %w", d.filenameBase, err)
|
|
}
|
|
if d.noFsync {
|
|
comp.DisableFsync()
|
|
}
|
|
p := ps.AddNew("merege "+datFileName, 1)
|
|
defer ps.Delete(p)
|
|
|
|
var cp CursorHeap
|
|
heap.Init(&cp)
|
|
for _, item := range valuesFiles {
|
|
g := item.decompressor.MakeGetter()
|
|
g.Reset(0)
|
|
if g.HasNext() {
|
|
key, _ := g.NextUncompressed()
|
|
var val []byte
|
|
if d.compressVals {
|
|
val, _ = g.Next(nil)
|
|
} else {
|
|
val, _ = g.NextUncompressed()
|
|
}
|
|
heap.Push(&cp, &CursorItem{
|
|
t: FILE_CURSOR,
|
|
dg: g,
|
|
key: key,
|
|
val: val,
|
|
endTxNum: item.endTxNum,
|
|
reverse: true,
|
|
})
|
|
}
|
|
}
|
|
keyCount := 0
|
|
// In the loop below, the pair `keyBuf=>valBuf` is always 1 item behind `lastKey=>lastVal`.
|
|
// `lastKey` and `lastVal` are taken from the top of the multi-way merge (assisted by the CursorHeap cp), but not processed right away
|
|
// instead, the pair from the previous iteration is processed first - `keyBuf=>valBuf`. After that, `keyBuf` and `valBuf` are assigned
|
|
// to `lastKey` and `lastVal` correspondingly, and the next step of multi-way merge happens. Therefore, after the multi-way merge loop
|
|
// (when CursorHeap cp is empty), there is a need to process the last pair `keyBuf=>valBuf`, because it was one step behind
|
|
var keyBuf, valBuf []byte
|
|
for cp.Len() > 0 {
|
|
lastKey := common.Copy(cp[0].key)
|
|
lastVal := common.Copy(cp[0].val)
|
|
// Advance all the items that have this key (including the top)
|
|
for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) {
|
|
ci1 := cp[0]
|
|
if ci1.dg.HasNext() {
|
|
ci1.key, _ = ci1.dg.NextUncompressed()
|
|
if d.compressVals {
|
|
ci1.val, _ = ci1.dg.Next(ci1.val[:0])
|
|
} else {
|
|
ci1.val, _ = ci1.dg.NextUncompressed()
|
|
}
|
|
heap.Fix(&cp, 0)
|
|
} else {
|
|
heap.Pop(&cp)
|
|
}
|
|
}
|
|
|
|
// empty value means deletion
|
|
deleted := r.valuesStartTxNum == 0 && len(lastVal) == 0
|
|
if !deleted {
|
|
if keyBuf != nil {
|
|
if err = comp.AddUncompressedWord(keyBuf); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
keyCount++ // Only counting keys, not values
|
|
switch d.compressVals {
|
|
case true:
|
|
if err = comp.AddWord(valBuf); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
default:
|
|
if err = comp.AddUncompressedWord(valBuf); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
}
|
|
}
|
|
keyBuf = append(keyBuf[:0], lastKey...)
|
|
valBuf = append(valBuf[:0], lastVal...)
|
|
}
|
|
}
|
|
if keyBuf != nil {
|
|
if err = comp.AddUncompressedWord(keyBuf); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
keyCount++ // Only counting keys, not values
|
|
if d.compressVals {
|
|
if err = comp.AddWord(valBuf); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
} else {
|
|
if err = comp.AddUncompressedWord(valBuf); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
}
|
|
}
|
|
if err = comp.Compress(); err != nil {
|
|
return nil, nil, nil, err
|
|
}
|
|
comp.Close()
|
|
comp = nil
|
|
ps.Delete(p)
|
|
valuesIn = newFilesItem(r.valuesStartTxNum, r.valuesEndTxNum, d.aggregationStep)
|
|
if valuesIn.decompressor, err = compress.NewDecompressor(datPath); err != nil {
|
|
return nil, nil, nil, fmt.Errorf("merge %s decompressor [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
|
|
}
|
|
|
|
idxFileName := fmt.Sprintf("%s.%d-%d.kvi", d.filenameBase, r.valuesStartTxNum/d.aggregationStep, r.valuesEndTxNum/d.aggregationStep)
|
|
idxPath := filepath.Join(d.dir, idxFileName)
|
|
p = ps.AddNew("merge "+idxFileName, uint64(keyCount*2))
|
|
defer ps.Delete(p)
|
|
ps.Delete(p)
|
|
|
|
// if valuesIn.index, err = buildIndex(valuesIn.decompressor, idxPath, d.dir, keyCount, false /* values */); err != nil {
|
|
if valuesIn.index, err = buildIndexThenOpen(ctx, valuesIn.decompressor, idxPath, d.tmpdir, keyCount, false /* values */, p, d.logger, d.noFsync); err != nil {
|
|
return nil, nil, nil, fmt.Errorf("merge %s buildIndex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
|
|
}
|
|
|
|
btFileName := strings.TrimSuffix(idxFileName, "kvi") + "bt"
|
|
p = ps.AddNew(btFileName, uint64(keyCount*2))
|
|
defer ps.Delete(p)
|
|
btPath := filepath.Join(d.dir, btFileName)
|
|
err = BuildBtreeIndexWithDecompressor(btPath, valuesIn.decompressor, p, d.tmpdir, d.logger)
|
|
if err != nil {
|
|
return nil, nil, nil, fmt.Errorf("merge %s btindex [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
|
|
}
|
|
|
|
bt, err := OpenBtreeIndexWithDecompressor(btPath, DefaultBtreeM, valuesIn.decompressor)
|
|
if err != nil {
|
|
return nil, nil, nil, fmt.Errorf("merge %s btindex2 [%d-%d]: %w", d.filenameBase, r.valuesStartTxNum, r.valuesEndTxNum, err)
|
|
}
|
|
valuesIn.bindex = bt
|
|
}
|
|
closeItem = false
|
|
d.stats.MergesCount++
|
|
return
|
|
}
|
|
|
|
func (ii *InvertedIndex) mergeFiles(ctx context.Context, files []*filesItem, startTxNum, endTxNum uint64, workers int, ps *background.ProgressSet) (*filesItem, error) {
|
|
for _, h := range files {
|
|
defer h.decompressor.EnableMadvNormal().DisableReadAhead()
|
|
}
|
|
|
|
var outItem *filesItem
|
|
var comp *compress.Compressor
|
|
var decomp *compress.Decompressor
|
|
var err error
|
|
var closeItem = true
|
|
defer func() {
|
|
if closeItem {
|
|
if comp != nil {
|
|
comp.Close()
|
|
}
|
|
if decomp != nil {
|
|
decomp.Close()
|
|
}
|
|
if outItem != nil {
|
|
if outItem.decompressor != nil {
|
|
outItem.decompressor.Close()
|
|
}
|
|
if outItem.index != nil {
|
|
outItem.index.Close()
|
|
}
|
|
outItem = nil
|
|
}
|
|
}
|
|
}()
|
|
if ctx.Err() != nil {
|
|
return nil, ctx.Err()
|
|
}
|
|
|
|
datFileName := fmt.Sprintf("%s.%d-%d.ef", ii.filenameBase, startTxNum/ii.aggregationStep, endTxNum/ii.aggregationStep)
|
|
datPath := filepath.Join(ii.dir, datFileName)
|
|
if comp, err = compress.NewCompressor(ctx, "Snapshots merge", datPath, ii.tmpdir, compress.MinPatternScore, workers, log.LvlTrace, ii.logger); err != nil {
|
|
return nil, fmt.Errorf("merge %s inverted index compressor: %w", ii.filenameBase, err)
|
|
}
|
|
if ii.noFsync {
|
|
comp.DisableFsync()
|
|
}
|
|
p := ps.AddNew("merge "+datFileName, 1)
|
|
defer ps.Delete(p)
|
|
|
|
var cp CursorHeap
|
|
heap.Init(&cp)
|
|
|
|
for _, item := range files {
|
|
g := item.decompressor.MakeGetter()
|
|
g.Reset(0)
|
|
if g.HasNext() {
|
|
key, _ := g.Next(nil)
|
|
val, _ := g.Next(nil)
|
|
//fmt.Printf("heap push %s [%d] %x\n", item.decompressor.FilePath(), item.endTxNum, key)
|
|
heap.Push(&cp, &CursorItem{
|
|
t: FILE_CURSOR,
|
|
dg: g,
|
|
key: key,
|
|
val: val,
|
|
endTxNum: item.endTxNum,
|
|
reverse: true,
|
|
})
|
|
}
|
|
}
|
|
keyCount := 0
|
|
|
|
// In the loop below, the pair `keyBuf=>valBuf` is always 1 item behind `lastKey=>lastVal`.
|
|
// `lastKey` and `lastVal` are taken from the top of the multi-way merge (assisted by the CursorHeap cp), but not processed right away
|
|
// instead, the pair from the previous iteration is processed first - `keyBuf=>valBuf`. After that, `keyBuf` and `valBuf` are assigned
|
|
// to `lastKey` and `lastVal` correspondingly, and the next step of multi-way merge happens. Therefore, after the multi-way merge loop
|
|
// (when CursorHeap cp is empty), there is a need to process the last pair `keyBuf=>valBuf`, because it was one step behind
|
|
var keyBuf, valBuf []byte
|
|
for cp.Len() > 0 {
|
|
lastKey := common.Copy(cp[0].key)
|
|
lastVal := common.Copy(cp[0].val)
|
|
var mergedOnce bool
|
|
|
|
// Advance all the items that have this key (including the top)
|
|
for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) {
|
|
ci1 := cp[0]
|
|
if mergedOnce {
|
|
if lastVal, err = mergeEfs(ci1.val, lastVal, nil); err != nil {
|
|
return nil, fmt.Errorf("merge %s inverted index: %w", ii.filenameBase, err)
|
|
}
|
|
} else {
|
|
mergedOnce = true
|
|
}
|
|
//fmt.Printf("multi-way %s [%d] %x\n", ii.indexKeysTable, ci1.endTxNum, ci1.key)
|
|
if ci1.dg.HasNext() {
|
|
ci1.key, _ = ci1.dg.NextUncompressed()
|
|
ci1.val, _ = ci1.dg.NextUncompressed()
|
|
//fmt.Printf("heap next push %s [%d] %x\n", ii.indexKeysTable, ci1.endTxNum, ci1.key)
|
|
heap.Fix(&cp, 0)
|
|
} else {
|
|
heap.Pop(&cp)
|
|
}
|
|
}
|
|
if keyBuf != nil {
|
|
if err = comp.AddUncompressedWord(keyBuf); err != nil {
|
|
return nil, err
|
|
}
|
|
keyCount++ // Only counting keys, not values
|
|
if err = comp.AddUncompressedWord(valBuf); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
keyBuf = append(keyBuf[:0], lastKey...)
|
|
valBuf = append(valBuf[:0], lastVal...)
|
|
}
|
|
if keyBuf != nil {
|
|
if err = comp.AddUncompressedWord(keyBuf); err != nil {
|
|
return nil, err
|
|
}
|
|
keyCount++ // Only counting keys, not values
|
|
if err = comp.AddUncompressedWord(valBuf); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if err = comp.Compress(); err != nil {
|
|
return nil, err
|
|
}
|
|
comp.Close()
|
|
comp = nil
|
|
outItem = newFilesItem(startTxNum, endTxNum, ii.aggregationStep)
|
|
if outItem.decompressor, err = compress.NewDecompressor(datPath); err != nil {
|
|
return nil, fmt.Errorf("merge %s decompressor [%d-%d]: %w", ii.filenameBase, startTxNum, endTxNum, err)
|
|
}
|
|
ps.Delete(p)
|
|
|
|
idxFileName := fmt.Sprintf("%s.%d-%d.efi", ii.filenameBase, startTxNum/ii.aggregationStep, endTxNum/ii.aggregationStep)
|
|
idxPath := filepath.Join(ii.dir, idxFileName)
|
|
p = ps.AddNew("merge "+idxFileName, uint64(outItem.decompressor.Count()*2))
|
|
defer ps.Delete(p)
|
|
if outItem.index, err = buildIndexThenOpen(ctx, outItem.decompressor, idxPath, ii.tmpdir, keyCount, false /* values */, p, ii.logger, ii.noFsync); err != nil {
|
|
return nil, fmt.Errorf("merge %s buildIndex [%d-%d]: %w", ii.filenameBase, startTxNum, endTxNum, err)
|
|
}
|
|
closeItem = false
|
|
return outItem, nil
|
|
}
|
|
|
|
func (h *History) mergeFiles(ctx context.Context, indexFiles, historyFiles []*filesItem, r HistoryRanges, workers int, ps *background.ProgressSet) (indexIn, historyIn *filesItem, err error) {
|
|
if !r.any() {
|
|
return nil, nil, nil
|
|
}
|
|
var closeIndex = true
|
|
defer func() {
|
|
if closeIndex {
|
|
if indexIn != nil {
|
|
indexIn.decompressor.Close()
|
|
indexIn.index.Close()
|
|
}
|
|
}
|
|
}()
|
|
if indexIn, err = h.InvertedIndex.mergeFiles(ctx, indexFiles, r.indexStartTxNum, r.indexEndTxNum, workers, ps); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
if r.history {
|
|
for _, f := range indexFiles {
|
|
defer f.decompressor.EnableMadvNormal().DisableReadAhead()
|
|
}
|
|
for _, f := range historyFiles {
|
|
defer f.decompressor.EnableMadvNormal().DisableReadAhead()
|
|
}
|
|
|
|
var comp *compress.Compressor
|
|
var decomp *compress.Decompressor
|
|
var rs *recsplit.RecSplit
|
|
var index *recsplit.Index
|
|
var closeItem = true
|
|
defer func() {
|
|
if closeItem {
|
|
if comp != nil {
|
|
comp.Close()
|
|
}
|
|
if decomp != nil {
|
|
decomp.Close()
|
|
}
|
|
if rs != nil {
|
|
rs.Close()
|
|
}
|
|
if index != nil {
|
|
index.Close()
|
|
}
|
|
if historyIn != nil {
|
|
if historyIn.decompressor != nil {
|
|
historyIn.decompressor.Close()
|
|
}
|
|
if historyIn.index != nil {
|
|
historyIn.index.Close()
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
datFileName := fmt.Sprintf("%s.%d-%d.v", h.filenameBase, r.historyStartTxNum/h.aggregationStep, r.historyEndTxNum/h.aggregationStep)
|
|
idxFileName := fmt.Sprintf("%s.%d-%d.vi", h.filenameBase, r.historyStartTxNum/h.aggregationStep, r.historyEndTxNum/h.aggregationStep)
|
|
datPath := filepath.Join(h.dir, datFileName)
|
|
idxPath := filepath.Join(h.dir, idxFileName)
|
|
if comp, err = compress.NewCompressor(ctx, "merge", datPath, h.tmpdir, compress.MinPatternScore, workers, log.LvlTrace, h.logger); err != nil {
|
|
return nil, nil, fmt.Errorf("merge %s history compressor: %w", h.filenameBase, err)
|
|
}
|
|
if h.noFsync {
|
|
comp.DisableFsync()
|
|
}
|
|
p := ps.AddNew("merge "+datFileName, 1)
|
|
defer ps.Delete(p)
|
|
var cp CursorHeap
|
|
heap.Init(&cp)
|
|
for _, item := range indexFiles {
|
|
g := item.decompressor.MakeGetter()
|
|
g.Reset(0)
|
|
if g.HasNext() {
|
|
var g2 *compress.Getter
|
|
for _, hi := range historyFiles { // full-scan, because it's ok to have different amount files. by unclean-shutdown.
|
|
if hi.startTxNum == item.startTxNum && hi.endTxNum == item.endTxNum {
|
|
g2 = hi.decompressor.MakeGetter()
|
|
break
|
|
}
|
|
}
|
|
if g2 == nil {
|
|
panic(fmt.Sprintf("for file: %s, not found corresponding file to merge", g.FileName()))
|
|
}
|
|
key, _ := g.NextUncompressed()
|
|
val, _ := g.NextUncompressed()
|
|
heap.Push(&cp, &CursorItem{
|
|
t: FILE_CURSOR,
|
|
dg: g,
|
|
dg2: g2,
|
|
key: key,
|
|
val: val,
|
|
endTxNum: item.endTxNum,
|
|
reverse: false,
|
|
})
|
|
}
|
|
}
|
|
// In the loop below, the pair `keyBuf=>valBuf` is always 1 item behind `lastKey=>lastVal`.
|
|
// `lastKey` and `lastVal` are taken from the top of the multi-way merge (assisted by the CursorHeap cp), but not processed right away
|
|
// instead, the pair from the previous iteration is processed first - `keyBuf=>valBuf`. After that, `keyBuf` and `valBuf` are assigned
|
|
// to `lastKey` and `lastVal` correspondingly, and the next step of multi-way merge happens. Therefore, after the multi-way merge loop
|
|
// (when CursorHeap cp is empty), there is a need to process the last pair `keyBuf=>valBuf`, because it was one step behind
|
|
var valBuf []byte
|
|
var keyCount int
|
|
for cp.Len() > 0 {
|
|
lastKey := common.Copy(cp[0].key)
|
|
// Advance all the items that have this key (including the top)
|
|
for cp.Len() > 0 && bytes.Equal(cp[0].key, lastKey) {
|
|
ci1 := cp[0]
|
|
count := eliasfano32.Count(ci1.val)
|
|
for i := uint64(0); i < count; i++ {
|
|
if !ci1.dg2.HasNext() {
|
|
panic(fmt.Errorf("assert: no value??? %s, i=%d, count=%d, lastKey=%x, ci1.key=%x", ci1.dg2.FileName(), i, count, lastKey, ci1.key))
|
|
}
|
|
|
|
if h.compressVals {
|
|
valBuf, _ = ci1.dg2.Next(valBuf[:0])
|
|
if err = comp.AddWord(valBuf); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
} else {
|
|
valBuf, _ = ci1.dg2.NextUncompressed()
|
|
if err = comp.AddUncompressedWord(valBuf); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
}
|
|
}
|
|
keyCount += int(count)
|
|
if ci1.dg.HasNext() {
|
|
ci1.key, _ = ci1.dg.NextUncompressed()
|
|
ci1.val, _ = ci1.dg.NextUncompressed()
|
|
heap.Fix(&cp, 0)
|
|
} else {
|
|
heap.Remove(&cp, 0)
|
|
}
|
|
}
|
|
}
|
|
if err = comp.Compress(); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
comp.Close()
|
|
comp = nil
|
|
if decomp, err = compress.NewDecompressor(datPath); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
ps.Delete(p)
|
|
|
|
p = ps.AddNew("merge "+idxFileName, uint64(2*keyCount))
|
|
defer ps.Delete(p)
|
|
if rs, err = recsplit.NewRecSplit(recsplit.RecSplitArgs{
|
|
KeyCount: keyCount,
|
|
Enums: false,
|
|
BucketSize: 2000,
|
|
LeafSize: 8,
|
|
TmpDir: h.tmpdir,
|
|
IndexFile: idxPath,
|
|
}, h.logger); err != nil {
|
|
return nil, nil, fmt.Errorf("create recsplit: %w", err)
|
|
}
|
|
rs.LogLvl(log.LvlTrace)
|
|
if h.noFsync {
|
|
rs.DisableFsync()
|
|
}
|
|
var historyKey []byte
|
|
var txKey [8]byte
|
|
var valOffset uint64
|
|
g := indexIn.decompressor.MakeGetter()
|
|
g2 := decomp.MakeGetter()
|
|
var keyBuf []byte
|
|
for {
|
|
g.Reset(0)
|
|
g2.Reset(0)
|
|
valOffset = 0
|
|
for g.HasNext() {
|
|
keyBuf, _ = g.NextUncompressed()
|
|
valBuf, _ = g.NextUncompressed()
|
|
ef, _ := eliasfano32.ReadEliasFano(valBuf)
|
|
efIt := ef.Iterator()
|
|
for efIt.HasNext() {
|
|
txNum, _ := efIt.Next()
|
|
binary.BigEndian.PutUint64(txKey[:], txNum)
|
|
historyKey = append(append(historyKey[:0], txKey[:]...), keyBuf...)
|
|
if err = rs.AddKey(historyKey, valOffset); err != nil {
|
|
return nil, nil, err
|
|
}
|
|
if h.compressVals {
|
|
valOffset, _ = g2.Skip()
|
|
} else {
|
|
valOffset, _ = g2.SkipUncompressed()
|
|
}
|
|
}
|
|
p.Processed.Add(1)
|
|
}
|
|
if err = rs.Build(ctx); err != nil {
|
|
if rs.Collision() {
|
|
log.Info("Building recsplit. Collision happened. It's ok. Restarting...")
|
|
rs.ResetNextSalt()
|
|
} else {
|
|
return nil, nil, fmt.Errorf("build %s idx: %w", h.filenameBase, err)
|
|
}
|
|
} else {
|
|
break
|
|
}
|
|
}
|
|
rs.Close()
|
|
rs = nil
|
|
if index, err = recsplit.OpenIndex(idxPath); err != nil {
|
|
return nil, nil, fmt.Errorf("open %s idx: %w", h.filenameBase, err)
|
|
}
|
|
historyIn = newFilesItem(r.historyStartTxNum, r.historyEndTxNum, h.aggregationStep)
|
|
historyIn.decompressor = decomp
|
|
historyIn.index = index
|
|
|
|
closeItem = false
|
|
}
|
|
|
|
closeIndex = false
|
|
return
|
|
}
|
|
|
|
func (d *Domain) integrateMergedFiles(valuesOuts, indexOuts, historyOuts []*filesItem, valuesIn, indexIn, historyIn *filesItem) {
|
|
d.History.integrateMergedFiles(indexOuts, historyOuts, indexIn, historyIn)
|
|
if valuesIn != nil {
|
|
d.files.Set(valuesIn)
|
|
|
|
// `kill -9` may leave some garbage
|
|
// but it still may be useful for merges, until we finish merge frozen file
|
|
if historyIn != nil && historyIn.frozen {
|
|
d.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.frozen || item.endTxNum > valuesIn.endTxNum {
|
|
continue
|
|
}
|
|
valuesOuts = append(valuesOuts, item)
|
|
}
|
|
return true
|
|
})
|
|
}
|
|
}
|
|
for _, out := range valuesOuts {
|
|
if out == nil {
|
|
panic("must not happen")
|
|
}
|
|
d.files.Delete(out)
|
|
out.canDelete.Store(true)
|
|
}
|
|
d.reCalcRoFiles()
|
|
}
|
|
|
|
func (ii *InvertedIndex) integrateMergedFiles(outs []*filesItem, in *filesItem) {
|
|
if in != nil {
|
|
ii.files.Set(in)
|
|
|
|
// `kill -9` may leave some garbage
|
|
// but it still may be useful for merges, until we finish merge frozen file
|
|
if in.frozen {
|
|
ii.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.frozen || item.endTxNum > in.endTxNum {
|
|
continue
|
|
}
|
|
outs = append(outs, item)
|
|
}
|
|
return true
|
|
})
|
|
}
|
|
}
|
|
for _, out := range outs {
|
|
if out == nil {
|
|
panic("must not happen: " + ii.filenameBase)
|
|
}
|
|
ii.files.Delete(out)
|
|
out.canDelete.Store(true)
|
|
}
|
|
ii.reCalcRoFiles()
|
|
}
|
|
|
|
func (h *History) integrateMergedFiles(indexOuts, historyOuts []*filesItem, indexIn, historyIn *filesItem) {
|
|
h.InvertedIndex.integrateMergedFiles(indexOuts, indexIn)
|
|
//TODO: handle collision
|
|
if historyIn != nil {
|
|
h.files.Set(historyIn)
|
|
|
|
// `kill -9` may leave some garbage
|
|
// but it still may be useful for merges, until we finish merge frozen file
|
|
if historyIn.frozen {
|
|
h.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.frozen || item.endTxNum > historyIn.endTxNum {
|
|
continue
|
|
}
|
|
historyOuts = append(historyOuts, item)
|
|
}
|
|
return true
|
|
})
|
|
}
|
|
}
|
|
for _, out := range historyOuts {
|
|
if out == nil {
|
|
panic("must not happen: " + h.filenameBase)
|
|
}
|
|
h.files.Delete(out)
|
|
out.canDelete.Store(true)
|
|
}
|
|
h.reCalcRoFiles()
|
|
}
|
|
|
|
// nolint
|
|
func (dc *DomainContext) frozenTo() uint64 {
|
|
if len(dc.files) == 0 {
|
|
return 0
|
|
}
|
|
for i := len(dc.files) - 1; i >= 0; i-- {
|
|
if dc.files[i].src.frozen {
|
|
return cmp.Min(dc.files[i].endTxNum, dc.hc.frozenTo())
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (hc *HistoryContext) frozenTo() uint64 {
|
|
if len(hc.files) == 0 {
|
|
return 0
|
|
}
|
|
for i := len(hc.files) - 1; i >= 0; i-- {
|
|
if hc.files[i].src.frozen {
|
|
return cmp.Min(hc.files[i].endTxNum, hc.ic.frozenTo())
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
func (ic *InvertedIndexContext) frozenTo() uint64 {
|
|
if len(ic.files) == 0 {
|
|
return 0
|
|
}
|
|
for i := len(ic.files) - 1; i >= 0; i-- {
|
|
if ic.files[i].src.frozen {
|
|
return ic.files[i].endTxNum
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (d *Domain) cleanAfterFreeze(frozenTo uint64) {
|
|
if frozenTo == 0 {
|
|
return
|
|
}
|
|
|
|
var outs []*filesItem
|
|
// `kill -9` may leave some garbage
|
|
// but it may be useful for merges, until merge `frozen` file
|
|
d.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.frozen || item.endTxNum > frozenTo {
|
|
continue
|
|
}
|
|
outs = append(outs, item)
|
|
}
|
|
return true
|
|
})
|
|
|
|
for _, out := range outs {
|
|
if out == nil {
|
|
panic("must not happen: " + d.filenameBase)
|
|
}
|
|
d.files.Delete(out)
|
|
if out.refcount.Load() == 0 {
|
|
// if it has no readers (invisible even for us) - it's safe to remove file right here
|
|
out.closeFilesAndRemove()
|
|
}
|
|
out.canDelete.Store(true)
|
|
}
|
|
d.History.cleanAfterFreeze(frozenTo)
|
|
}
|
|
|
|
// cleanAfterFreeze - mark all small files before `f` as `canDelete=true`
|
|
func (h *History) cleanAfterFreeze(frozenTo uint64) {
|
|
if frozenTo == 0 {
|
|
return
|
|
}
|
|
//if h.filenameBase == "accounts" {
|
|
// log.Warn("[history] History.cleanAfterFreeze", "frozenTo", frozenTo/h.aggregationStep, "stack", dbg.Stack())
|
|
//}
|
|
var outs []*filesItem
|
|
// `kill -9` may leave some garbage
|
|
// but it may be useful for merges, until merge `frozen` file
|
|
h.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.frozen || item.endTxNum > frozenTo {
|
|
continue
|
|
}
|
|
outs = append(outs, item)
|
|
}
|
|
return true
|
|
})
|
|
|
|
for _, out := range outs {
|
|
if out == nil {
|
|
panic("must not happen: " + h.filenameBase)
|
|
}
|
|
out.canDelete.Store(true)
|
|
|
|
//if out.refcount.Load() == 0 {
|
|
// if h.filenameBase == "accounts" {
|
|
// log.Warn("[history] History.cleanAfterFreeze: immediately delete", "name", out.decompressor.FileName())
|
|
// }
|
|
//} else {
|
|
// if h.filenameBase == "accounts" {
|
|
// log.Warn("[history] History.cleanAfterFreeze: mark as 'canDelete=true'", "name", out.decompressor.FileName())
|
|
// }
|
|
//}
|
|
|
|
// if it has no readers (invisible even for us) - it's safe to remove file right here
|
|
if out.refcount.Load() == 0 {
|
|
out.closeFilesAndRemove()
|
|
}
|
|
h.files.Delete(out)
|
|
}
|
|
h.InvertedIndex.cleanAfterFreeze(frozenTo)
|
|
}
|
|
|
|
// cleanAfterFreeze - mark all small files before `f` as `canDelete=true`
|
|
func (ii *InvertedIndex) cleanAfterFreeze(frozenTo uint64) {
|
|
if frozenTo == 0 {
|
|
return
|
|
}
|
|
var outs []*filesItem
|
|
// `kill -9` may leave some garbage
|
|
// but it may be useful for merges, until merge `frozen` file
|
|
ii.files.Walk(func(items []*filesItem) bool {
|
|
for _, item := range items {
|
|
if item.frozen || item.endTxNum > frozenTo {
|
|
continue
|
|
}
|
|
outs = append(outs, item)
|
|
}
|
|
return true
|
|
})
|
|
|
|
for _, out := range outs {
|
|
if out == nil {
|
|
panic("must not happen: " + ii.filenameBase)
|
|
}
|
|
out.canDelete.Store(true)
|
|
if out.refcount.Load() == 0 {
|
|
// if it has no readers (invisible even for us) - it's safe to remove file right here
|
|
out.closeFilesAndRemove()
|
|
}
|
|
ii.files.Delete(out)
|
|
}
|
|
}
|
|
|
|
// nolint
|
|
func (d *Domain) deleteGarbageFiles() {
|
|
for _, item := range d.garbageFiles {
|
|
// paranoic-mode: don't delete frozen files
|
|
steps := item.endTxNum/d.aggregationStep - item.startTxNum/d.aggregationStep
|
|
if steps%StepsInBiggestFile == 0 {
|
|
continue
|
|
}
|
|
f1 := fmt.Sprintf("%s.%d-%d.kv", d.filenameBase, item.startTxNum/d.aggregationStep, item.endTxNum/d.aggregationStep)
|
|
os.Remove(filepath.Join(d.dir, f1))
|
|
log.Debug("[snapshots] delete garbage", f1)
|
|
f2 := fmt.Sprintf("%s.%d-%d.kvi", d.filenameBase, item.startTxNum/d.aggregationStep, item.endTxNum/d.aggregationStep)
|
|
os.Remove(filepath.Join(d.dir, f2))
|
|
log.Debug("[snapshots] delete garbage", f2)
|
|
}
|
|
d.garbageFiles = nil
|
|
d.History.deleteGarbageFiles()
|
|
}
|
|
func (h *History) deleteGarbageFiles() {
|
|
for _, item := range h.garbageFiles {
|
|
// paranoic-mode: don't delete frozen files
|
|
if item.endTxNum/h.aggregationStep-item.startTxNum/h.aggregationStep == StepsInBiggestFile {
|
|
continue
|
|
}
|
|
f1 := fmt.Sprintf("%s.%d-%d.v", h.filenameBase, item.startTxNum/h.aggregationStep, item.endTxNum/h.aggregationStep)
|
|
os.Remove(filepath.Join(h.dir, f1))
|
|
log.Debug("[snapshots] delete garbage", f1)
|
|
f2 := fmt.Sprintf("%s.%d-%d.vi", h.filenameBase, item.startTxNum/h.aggregationStep, item.endTxNum/h.aggregationStep)
|
|
os.Remove(filepath.Join(h.dir, f2))
|
|
log.Debug("[snapshots] delete garbage", f2)
|
|
}
|
|
h.garbageFiles = nil
|
|
h.InvertedIndex.deleteGarbageFiles()
|
|
}
|
|
func (ii *InvertedIndex) deleteGarbageFiles() {
|
|
for _, item := range ii.garbageFiles {
|
|
// paranoic-mode: don't delete frozen files
|
|
if item.endTxNum/ii.aggregationStep-item.startTxNum/ii.aggregationStep == StepsInBiggestFile {
|
|
continue
|
|
}
|
|
f1 := fmt.Sprintf("%s.%d-%d.ef", ii.filenameBase, item.startTxNum/ii.aggregationStep, item.endTxNum/ii.aggregationStep)
|
|
os.Remove(filepath.Join(ii.dir, f1))
|
|
log.Debug("[snapshots] delete garbage", f1)
|
|
f2 := fmt.Sprintf("%s.%d-%d.efi", ii.filenameBase, item.startTxNum/ii.aggregationStep, item.endTxNum/ii.aggregationStep)
|
|
os.Remove(filepath.Join(ii.dir, f2))
|
|
log.Debug("[snapshots] delete garbage", f2)
|
|
}
|
|
ii.garbageFiles = nil
|
|
}
|