ETL: use logPrefix as suffix of tmp files (#2921)

This commit is contained in:
Alex Sharov 2021-11-05 17:19:44 +07:00 committed by GitHub
parent a2d5b00496
commit a5bbe82a59
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 28 additions and 7 deletions

View File

@ -63,7 +63,7 @@ import (
"github.com/wcharczuk/go-chart/v2" "github.com/wcharczuk/go-chart/v2"
) )
const ASSERT = false const ASSERT = true
var ( var (
verbosity = flag.Uint("verbosity", 3, "Logging verbosity: 0=silent, 1=error, 2=warn, 3=info, 4=debug, 5=detail (default 3)") verbosity = flag.Uint("verbosity", 3, "Logging verbosity: 0=silent, 1=error, 2=warn, 3=info, 4=debug, 5=detail (default 3)")
@ -2379,7 +2379,7 @@ func reducedict(name string) error {
posMap[l] += c posMap[l] += c
} }
} }
fmt.Printf("posMap = %v\n", posMap) //fmt.Printf("posMap = %v\n", posMap)
var patternList PatternList var patternList PatternList
for _, p := range code2pattern { for _, p := range code2pattern {
if p.uses > 0 { if p.uses > 0 {
@ -2716,6 +2716,25 @@ func reducedict(name string) error {
} }
return nil return nil
} }
func recsplitWholeChain(chaindata string) error {
blocksPerFile := 500_000
blockTotal = &blocksPerFile
for i := 0; i < 13_500_000; i += *blockTotal {
*name = fmt.Sprintf("bodies%d-%dm", i/1_000_000, i%1_000_000/100_000)
log.Info("Creating", "file", *name)
block = &i
if err := dumpTxs(chaindata, uint64(*block), *blockTotal, *name); err != nil {
return err
}
if err := compress1(chaindata, *name); err != nil {
return err
}
_ = os.Remove(*name + ".dat")
}
return nil
}
func recsplitLookup(chaindata, name string) error { func recsplitLookup(chaindata, name string) error {
database := mdbx.MustOpen(chaindata) database := mdbx.MustOpen(chaindata)
defer database.Close() defer database.Close()
@ -3588,7 +3607,7 @@ func fixState(chaindata string) error {
return tx.Commit() return tx.Commit()
} }
func dumpTxs(chaindata string, block uint64, totalBlocks int, name string) error { func dumpTxs(chaindata string, block uint64, blockTotal int, name string) error {
db := mdbx.MustOpen(chaindata) db := mdbx.MustOpen(chaindata)
defer db.Close() defer db.Close()
chainConfig := tool.ChainConfigFromDB(db) chainConfig := tool.ChainConfigFromDB(db)
@ -3625,7 +3644,7 @@ func dumpTxs(chaindata string, block uint64, totalBlocks int, name string) error
k, v, e := bodies.Seek(blockEncoded) k, v, e := bodies.Seek(blockEncoded)
for ; k != nil && e == nil; k, v, e = bodies.Next() { for ; k != nil && e == nil; k, v, e = bodies.Next() {
bodyNum := binary.BigEndian.Uint64(k) bodyNum := binary.BigEndian.Uint64(k)
if bodyNum >= block+uint64(*blockTotal) { if bodyNum >= block+uint64(blockTotal) {
break break
} }
var body types.BodyForStorage var body types.BodyForStorage
@ -4244,6 +4263,8 @@ func main() {
err = compress1(*chaindata, *name) err = compress1(*chaindata, *name)
case "createIdx": case "createIdx":
err = createIdx(*chaindata, *name) err = createIdx(*chaindata, *name)
case "recsplitWholeChain":
err = recsplitWholeChain(*chaindata)
case "recsplitLookup": case "recsplitLookup":
err = recsplitLookup(*chaindata, *name) err = recsplitLookup(*chaindata, *name)
case "decompress": case "decompress":

2
go.mod
View File

@ -36,7 +36,7 @@ require (
github.com/json-iterator/go v1.1.12 github.com/json-iterator/go v1.1.12
github.com/julienschmidt/httprouter v1.3.0 github.com/julienschmidt/httprouter v1.3.0
github.com/kevinburke/go-bindata v3.21.0+incompatible github.com/kevinburke/go-bindata v3.21.0+incompatible
github.com/ledgerwatch/erigon-lib v0.0.0-20211104110507-597d0fbb01ab github.com/ledgerwatch/erigon-lib v0.0.0-20211105094006-5feef983fd25
github.com/ledgerwatch/log/v3 v3.4.0 github.com/ledgerwatch/log/v3 v3.4.0
github.com/ledgerwatch/secp256k1 v1.0.0 github.com/ledgerwatch/secp256k1 v1.0.0
github.com/logrusorgru/aurora/v3 v3.0.0 github.com/logrusorgru/aurora/v3 v3.0.0

4
go.sum
View File

@ -497,8 +497,8 @@ github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758 h1:0D5M2HQSGD3P
github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k= github.com/kylelemons/godebug v0.0.0-20170224010052-a616ab194758/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c= github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8= github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
github.com/ledgerwatch/erigon-lib v0.0.0-20211104110507-597d0fbb01ab h1:CNIgX4Sw1uybwmLgLmWpAaNrm4ADo33BLpz4Zo3FnqI= github.com/ledgerwatch/erigon-lib v0.0.0-20211105094006-5feef983fd25 h1:7u6CL7dcWmaozQGhUNTJuMZdJOtT56Lo3YCaNzYSx6Y=
github.com/ledgerwatch/erigon-lib v0.0.0-20211104110507-597d0fbb01ab/go.mod h1:CuEZROm43MykZT5CjCj02jw0FOwaDl8Nh+PZkTEGopg= github.com/ledgerwatch/erigon-lib v0.0.0-20211105094006-5feef983fd25/go.mod h1:CuEZROm43MykZT5CjCj02jw0FOwaDl8Nh+PZkTEGopg=
github.com/ledgerwatch/log/v3 v3.4.0 h1:SEIOcv5a2zkG3PmoT5jeTU9m/0nEUv0BJS5bzsjwKCI= github.com/ledgerwatch/log/v3 v3.4.0 h1:SEIOcv5a2zkG3PmoT5jeTU9m/0nEUv0BJS5bzsjwKCI=
github.com/ledgerwatch/log/v3 v3.4.0/go.mod h1:VXcz6Ssn6XEeU92dCMc39/g1F0OYAjw1Mt+dGP5DjXY= github.com/ledgerwatch/log/v3 v3.4.0/go.mod h1:VXcz6Ssn6XEeU92dCMc39/g1F0OYAjw1Mt+dGP5DjXY=
github.com/ledgerwatch/secp256k1 v1.0.0 h1:Usvz87YoTG0uePIV8woOof5cQnLXGYa162rFf3YnwaQ= github.com/ledgerwatch/secp256k1 v1.0.0 h1:Usvz87YoTG0uePIV8woOof5cQnLXGYa162rFf3YnwaQ=