recsplit: reduce ram pressure (#9218)

reasons: 
- indexing done in background (or in many workers)
- `recsplit` has 2 etl collectors
This commit is contained in:
Alex Sharov 2024-01-12 17:26:20 +07:00 committed by GitHub
parent 66cd4e71fa
commit 3bb1917e8a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 32 additions and 33 deletions

View File

@ -161,7 +161,11 @@ func NewRecSplit(args RecSplitArgs, logger log.Logger) (*RecSplit, error) {
rs.baseDataID = args.BaseDataID
rs.etlBufLimit = args.EtlBufLimit
if rs.etlBufLimit == 0 {
rs.etlBufLimit = etl.BufferOptimalSize
// reduce ram pressure, because:
// - indexing done in background or in many workers (building many indices in-parallel)
// - `recsplit` has 2 etl collectors
// - `rescplit` building is cpu-intencive and bottleneck is not in etl loading
rs.etlBufLimit = etl.BufferOptimalSize / 8
}
rs.bucketCollector = etl.NewCollector(RecSplitLogPrefix+" "+fname, rs.tmpDir, etl.NewSortableBuffer(rs.etlBufLimit), logger)
rs.bucketCollector.LogLvl(log.LvlDebug)

View File

@ -406,13 +406,12 @@ func iterateForVi(historyItem, iiItem *filesItem, p *background.Progress, compre
func buildVi(ctx context.Context, historyItem, iiItem *filesItem, historyIdxPath, tmpdir string, count int, p *background.Progress, compressVals bool, logger log.Logger) error {
rs, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{
KeyCount: count,
Enums: false,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpdir,
IndexFile: historyIdxPath,
EtlBufLimit: etl.BufferOptimalSize / 2,
KeyCount: count,
Enums: false,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpdir,
IndexFile: historyIdxPath,
}, logger)
if err != nil {
return fmt.Errorf("create recsplit: %w", err)

View File

@ -35,7 +35,6 @@ import (
"github.com/ledgerwatch/erigon-lib/compress"
"github.com/ledgerwatch/erigon-lib/diagnostics"
"github.com/ledgerwatch/erigon-lib/downloader/snaptype"
"github.com/ledgerwatch/erigon-lib/etl"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon-lib/recsplit"
types2 "github.com/ledgerwatch/erigon-lib/types"
@ -2173,28 +2172,26 @@ func TransactionsIdx(ctx context.Context, chainConfig *chain.Config, version uin
}
txnHashIdx, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{
KeyCount: d.Count(),
Enums: true,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpDir,
IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.Transactions.String())),
BaseDataID: firstTxID,
EtlBufLimit: etl.BufferOptimalSize / 2,
KeyCount: d.Count(),
Enums: true,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpDir,
IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.Transactions.String())),
BaseDataID: firstTxID,
}, logger)
if err != nil {
return err
}
txnHash2BlockNumIdx, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{
KeyCount: d.Count(),
Enums: false,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpDir,
IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.Transactions2Block.String())),
BaseDataID: firstBlockNum,
EtlBufLimit: etl.BufferOptimalSize / 2,
KeyCount: d.Count(),
Enums: false,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpDir,
IndexFile: filepath.Join(snapDir, snaptype.IdxFileName(version, blockFrom, blockTo, snaptype.Transactions2Block.String())),
BaseDataID: firstBlockNum,
}, logger)
if err != nil {
return err
@ -2382,14 +2379,13 @@ func Idx(ctx context.Context, d *compress.Decompressor, firstDataID uint64, tmpD
var idxFilePath = segmentFileName[0:len(segmentFileName)-len(extension)] + ".idx"
rs, err := recsplit.NewRecSplit(recsplit.RecSplitArgs{
KeyCount: d.Count(),
Enums: true,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpDir,
IndexFile: idxFilePath,
BaseDataID: firstDataID,
EtlBufLimit: etl.BufferOptimalSize / 2,
KeyCount: d.Count(),
Enums: true,
BucketSize: 2000,
LeafSize: 8,
TmpDir: tmpDir,
IndexFile: idxFilePath,
BaseDataID: firstDataID,
}, logger)
if err != nil {
return err