mirror of
https://gitlab.com/pulsechaincom/erigon-pulse.git
synced 2024-12-24 12:37:16 +00:00
add words count in .seg (breaking change in snapshot format) (#3140)
This commit is contained in:
parent
502e933029
commit
82753a6cda
@ -2171,19 +2171,19 @@ func reducedict(name string, segmentFileName string) error {
|
||||
wg.Add(1)
|
||||
go reduceDictWorker(ch, &wg, &pt, collector, inputSize, outputSize, posMap)
|
||||
}
|
||||
i := 0
|
||||
var wordsCount uint64
|
||||
if err := snapshotsync.ReadSimpleFile(name+".dat", func(v []byte) error {
|
||||
input := make([]byte, 8+int(len(v)))
|
||||
binary.BigEndian.PutUint64(input, uint64(i))
|
||||
binary.BigEndian.PutUint64(input, wordsCount)
|
||||
copy(input[8:], v)
|
||||
ch <- input
|
||||
i++
|
||||
wordsCount++
|
||||
select {
|
||||
default:
|
||||
case <-logEvery.C:
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
log.Info("Replacement preprocessing", "processed", fmt.Sprintf("%dK", i/1_000), "input", common.StorageSize(inputSize.Load()), "output", common.StorageSize(outputSize.Load()), "alloc", common.StorageSize(m.Alloc), "sys", common.StorageSize(m.Sys))
|
||||
log.Info("Replacement preprocessing", "processed", fmt.Sprintf("%dK", wordsCount/1_000), "input", common.StorageSize(inputSize.Load()), "output", common.StorageSize(outputSize.Load()), "alloc", common.StorageSize(m.Alloc), "sys", common.StorageSize(m.Sys))
|
||||
}
|
||||
return nil
|
||||
}); err != nil {
|
||||
@ -2191,6 +2191,7 @@ func reducedict(name string, segmentFileName string) error {
|
||||
}
|
||||
close(ch)
|
||||
wg.Wait()
|
||||
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
log.Info("Done", "input", common.StorageSize(inputSize.Load()), "output", common.StorageSize(outputSize.Load()), "alloc", common.StorageSize(m.Alloc), "sys", common.StorageSize(m.Sys))
|
||||
@ -2223,7 +2224,7 @@ func reducedict(name string, segmentFileName string) error {
|
||||
offset += uint64(n + len(p.w))
|
||||
}
|
||||
patternCutoff := offset // All offsets below this will be considered patterns
|
||||
i = 0
|
||||
i := 0
|
||||
log.Info("Effective dictionary", "size", patternList.Len())
|
||||
// Build Huffman tree for codes
|
||||
var codeHeap PatternHeap
|
||||
@ -2284,17 +2285,22 @@ func reducedict(name string, segmentFileName string) error {
|
||||
return err
|
||||
}
|
||||
cw := bufio.NewWriterSize(cf, etl.BufIOSize)
|
||||
// First, output dictionary
|
||||
// 1-st, output dictionary
|
||||
binary.BigEndian.PutUint64(numBuf, wordsCount) // Dictionary size
|
||||
if _, err = cw.Write(numBuf[:8]); err != nil {
|
||||
return err
|
||||
}
|
||||
// 2-nd, output dictionary
|
||||
binary.BigEndian.PutUint64(numBuf, offset) // Dictionary size
|
||||
if _, err = cw.Write(numBuf[:8]); err != nil {
|
||||
return err
|
||||
}
|
||||
// Secondly, output directory root
|
||||
// 3-rd, output directory root
|
||||
binary.BigEndian.PutUint64(numBuf, root.offset)
|
||||
if _, err = cw.Write(numBuf[:8]); err != nil {
|
||||
return err
|
||||
}
|
||||
// Thirdly, output pattern cutoff offset
|
||||
// 4-th, output pattern cutoff offset
|
||||
binary.BigEndian.PutUint64(numBuf, patternCutoff)
|
||||
if _, err = cw.Write(numBuf[:8]); err != nil {
|
||||
return err
|
||||
|
2
go.mod
2
go.mod
@ -37,7 +37,7 @@ require (
|
||||
github.com/json-iterator/go v1.1.12
|
||||
github.com/julienschmidt/httprouter v1.3.0
|
||||
github.com/kevinburke/go-bindata v3.21.0+incompatible
|
||||
github.com/ledgerwatch/erigon-lib v0.0.0-20211217093546-8d06531e4ed3
|
||||
github.com/ledgerwatch/erigon-lib v0.0.0-20211221034520-583c3f9b5b6f
|
||||
github.com/ledgerwatch/log/v3 v3.4.0
|
||||
github.com/ledgerwatch/secp256k1 v1.0.0
|
||||
github.com/logrusorgru/aurora/v3 v3.0.0
|
||||
|
4
go.sum
4
go.sum
@ -617,8 +617,8 @@ github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758 h1:0D5M2HQSGD3P
|
||||
github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
|
||||
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
|
||||
github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
|
||||
github.com/ledgerwatch/erigon-lib v0.0.0-20211217093546-8d06531e4ed3 h1:8qDZvisP+6pFiVFd20BTD2y8/rYAe4go//HdBnk6CX8=
|
||||
github.com/ledgerwatch/erigon-lib v0.0.0-20211217093546-8d06531e4ed3/go.mod h1:lyGP3i0x4CeabdKZ4beycD5xZfHWZwJsAX+70OfGj4Y=
|
||||
github.com/ledgerwatch/erigon-lib v0.0.0-20211221034520-583c3f9b5b6f h1:MCIljelbCsLcgMzNTsrRg2Nu5DFyNlLxf5ZSWdy3CiM=
|
||||
github.com/ledgerwatch/erigon-lib v0.0.0-20211221034520-583c3f9b5b6f/go.mod h1:lyGP3i0x4CeabdKZ4beycD5xZfHWZwJsAX+70OfGj4Y=
|
||||
github.com/ledgerwatch/log/v3 v3.4.0 h1:SEIOcv5a2zkG3PmoT5jeTU9m/0nEUv0BJS5bzsjwKCI=
|
||||
github.com/ledgerwatch/log/v3 v3.4.0/go.mod h1:VXcz6Ssn6XEeU92dCMc39/g1F0OYAjw1Mt+dGP5DjXY=
|
||||
github.com/ledgerwatch/secp256k1 v1.0.0 h1:Usvz87YoTG0uePIV8woOof5cQnLXGYa162rFf3YnwaQ=
|
||||
|
Loading…
Reference in New Issue
Block a user