mirror of
https://gitlab.com/pulsechaincom/erigon-pulse.git
synced 2025-01-15 23:38:19 +00:00
parent
f5deff3ac9
commit
e649f7ea91
@ -116,6 +116,7 @@ type Getter struct {
|
||||
mask byte
|
||||
uncovered []int // Buffer for uncovered portions of the word
|
||||
word []byte
|
||||
fName string
|
||||
}
|
||||
|
||||
func (g *Getter) zero() bool {
|
||||
@ -212,7 +213,7 @@ func (d *Decompressor) Count() int { return int(d.count) }
|
||||
// Getter is not thread-safe, but there can be multiple getters used simultaneously and concurrently
|
||||
// for the same decompressor
|
||||
func (d *Decompressor) MakeGetter() *Getter {
|
||||
return &Getter{patternDict: &d.dict, posDict: &d.posDict, data: d.data[d.wordsStart:], uncovered: make([]int, 0, 128)}
|
||||
return &Getter{patternDict: &d.dict, posDict: &d.posDict, data: d.data[d.wordsStart:], uncovered: make([]int, 0, 128), fName: d.compressedFile}
|
||||
}
|
||||
|
||||
func (g *Getter) Reset(offset uint64) {
|
||||
@ -247,7 +248,7 @@ func (g *Getter) Next(buf []byte) ([]byte, uint64) {
|
||||
lastPos = intPos
|
||||
pattern := g.nextPattern()
|
||||
if len(g.word) < intPos {
|
||||
panic("likely .idx is invalid")
|
||||
panic(fmt.Sprintf("likely .idx is invalid: %s", g.fName))
|
||||
}
|
||||
copy(g.word[intPos:], pattern)
|
||||
if intPos > lastUncovered {
|
||||
@ -284,7 +285,7 @@ func (g *Getter) Skip() uint64 {
|
||||
intPos := lastPos + int(pos) - 1
|
||||
lastPos = intPos
|
||||
if wordLen < intPos {
|
||||
panic("likely .idx is invalid")
|
||||
panic(fmt.Sprintf("likely .idx is invalid: %s", g.fName))
|
||||
}
|
||||
if intPos > lastUncovered {
|
||||
add += uint64(intPos - lastUncovered)
|
||||
@ -402,7 +403,7 @@ func (g *Getter) MatchPrefix(buf []byte) bool {
|
||||
intPos := lastPos + int(pos) - 1
|
||||
lastPos = intPos
|
||||
if wordLen < intPos {
|
||||
panic("likely .idx is invalid")
|
||||
panic(fmt.Sprintf("likely .idx is invalid: %s", g.fName))
|
||||
}
|
||||
pattern = g.nextPattern()
|
||||
if strings.HasPrefix(string(pattern), string(buf)) {
|
||||
|
@ -110,10 +110,11 @@ func (p *fileDataProvider) String() string {
|
||||
}
|
||||
|
||||
func writeToDisk(encoder Encoder, entries []sortableBufferEntry) error {
|
||||
pair := [2][]byte{}
|
||||
pair := make([][]byte, 2)
|
||||
pairInterface := interface{}(pair) // to avoid interface cast on each iteration
|
||||
for i := range entries {
|
||||
pair[0], pair[1] = entries[i].key, entries[i].value
|
||||
if err := encoder.Encode(pair); err != nil {
|
||||
if err := encoder.Encode(pairInterface); err != nil {
|
||||
return fmt.Errorf("error writing entries to disk: %w", err)
|
||||
}
|
||||
}
|
||||
|
@ -24,9 +24,7 @@ import (
|
||||
"math"
|
||||
"math/bits"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/ledgerwatch/erigon-lib/common"
|
||||
"github.com/ledgerwatch/erigon-lib/etl"
|
||||
"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano16"
|
||||
"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
|
||||
@ -97,6 +95,7 @@ type RecSplit struct {
|
||||
indexW *bufio.Writer
|
||||
bytesPerRec int
|
||||
numBuf [8]byte
|
||||
bucketKeyBuf [16]byte
|
||||
trace bool
|
||||
prevOffset uint64 // Previously added offset (for calculating minDelta for Elias Fano encoding of "enum -> offset" index)
|
||||
minDelta uint64 // minDelta for Elias Fano encoding of "enum -> offset" index
|
||||
@ -281,11 +280,9 @@ func (rs *RecSplit) AddKey(key []byte, offset uint64) error {
|
||||
rs.hasher.Reset()
|
||||
rs.hasher.Write(key) //nolint:errcheck
|
||||
hi, lo := rs.hasher.Sum128()
|
||||
var bucketKey [16]byte
|
||||
binary.BigEndian.PutUint64(bucketKey[:], remap(hi, rs.bucketCount))
|
||||
binary.BigEndian.PutUint64(bucketKey[8:], lo)
|
||||
var offsetVal [8]byte
|
||||
binary.BigEndian.PutUint64(offsetVal[:], offset)
|
||||
binary.BigEndian.PutUint64(rs.bucketKeyBuf[:], remap(hi, rs.bucketCount))
|
||||
binary.BigEndian.PutUint64(rs.bucketKeyBuf[8:], lo)
|
||||
binary.BigEndian.PutUint64(rs.numBuf[:], offset)
|
||||
if offset > rs.maxOffset {
|
||||
rs.maxOffset = offset
|
||||
}
|
||||
@ -297,16 +294,15 @@ func (rs *RecSplit) AddKey(key []byte, offset uint64) error {
|
||||
}
|
||||
|
||||
if rs.enums {
|
||||
if err := rs.offsetCollector.Collect(offsetVal[:], nil); err != nil {
|
||||
if err := rs.offsetCollector.Collect(rs.numBuf[:], nil); err != nil {
|
||||
return err
|
||||
}
|
||||
var keyIdx [8]byte
|
||||
binary.BigEndian.PutUint64(keyIdx[:], rs.keysAdded)
|
||||
if err := rs.bucketCollector.Collect(bucketKey[:], keyIdx[:]); err != nil {
|
||||
binary.BigEndian.PutUint64(rs.numBuf[:], rs.keysAdded)
|
||||
if err := rs.bucketCollector.Collect(rs.bucketKeyBuf[:], rs.numBuf[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
if err := rs.bucketCollector.Collect(bucketKey[:], offsetVal[:]); err != nil {
|
||||
if err := rs.bucketCollector.Collect(rs.bucketKeyBuf[:], rs.numBuf[:]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@ -495,9 +491,7 @@ func (rs *RecSplit) loadFuncOffset(k, _ []byte, _ etl.CurrentTableReader, _ etl.
|
||||
// Build has to be called after all the keys have been added, and it initiates the process
|
||||
// of building the perfect hash function and writing index into a file
|
||||
func (rs *RecSplit) Build() error {
|
||||
_, fileName := filepath.Split(rs.indexFile)
|
||||
tmpIdxFilePath := filepath.Join(rs.tmpDir, fileName)
|
||||
common.MustExist(rs.tmpDir)
|
||||
tmpIdxFilePath := rs.indexFile + ".tmp"
|
||||
|
||||
if rs.built {
|
||||
return fmt.Errorf("already built")
|
||||
@ -622,8 +616,6 @@ func (rs *RecSplit) Build() error {
|
||||
_ = rs.indexW.Flush()
|
||||
_ = rs.indexF.Sync()
|
||||
_ = rs.indexF.Close()
|
||||
dir, _ := filepath.Split(rs.indexFile)
|
||||
common.MustExist(dir)
|
||||
if err := os.Rename(tmpIdxFilePath, rs.indexFile); err != nil {
|
||||
return err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user