experiment with rewriting gas_limits to python+lmdb (just to see how it can be) (#654)

This commit is contained in:
Alex Sharov 2020-06-12 15:25:40 +07:00 committed by GitHub
parent 351c188b02
commit 9961fef39e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 178 additions and 0 deletions

20
cmd/state/py/chain.py Normal file
View File

@ -0,0 +1,20 @@
import dbutils
import common
import rlp as rlp
from eth.rlp.headers import BlockHeader, BlockHeaderAPI
def lastBlockNumber(env):
b = env.open_db(dbutils.HeadHeaderKey, create=False)
b1 = env.open_db(dbutils.HeaderNumberPrefix, create=False)
with env.begin(write=False) as txn:
blockHashData = txn.get(dbutils.HeadHeaderKey, db=b)
assert len(blockHashData) == common.HashLength, "%d != %d" % (len(blockHashData), common.HashLength)
blockNumberData = txn.get(blockHashData, db=b1)
assert len(blockNumberData) == 8
return common.bytesToUint64(blockNumberData)
def decode_block_header(header_rlp: bytes) -> BlockHeaderAPI:
return rlp.decode(header_rlp, sedes=BlockHeader)

10
cmd/state/py/common.py Normal file
View File

@ -0,0 +1,10 @@
import struct
HashLength = 32
AddressLength = 20
BlockNumberLength = 8
IncarnationLength = 8
def bytesToUint64(val): return struct.unpack("<Q", val)[0]
def uint64ToBytes(val): return struct.pack("<Q", val)

69
cmd/state/py/dbutils.py Normal file
View File

@ -0,0 +1,69 @@
# cat common/dbutils/bucket.go| grep '=' | grep byte | sed 's/\[\]byte(//' | sed 's/)//' | awk '{print $1 $2 $3".encode()"}' | grep -v '//'
import common
PlainStateBucket = "PLAIN-CST".encode()
PlainContractCodeBucket = "PLAIN-contractCode".encode()
PlainAccountChangeSetBucket = "PLAIN-ACS".encode()
PlainStorageChangeSetBucket = "PLAIN-SCS".encode()
CurrentStateBucket = "CST".encode()
AccountsHistoryBucket = "hAT".encode()
StorageHistoryBucket = "hST".encode()
CodeBucket = "CODE".encode()
ContractCodeBucket = "contractCode".encode()
IncarnationMapBucket = "incarnationMap".encode()
AccountChangeSetBucket = "ACS".encode()
StorageChangeSetBucket = "SCS".encode()
IntermediateTrieHashBucket = "iTh".encode()
IntermediateWitnessSizeBucket = "iws".encode()
DatabaseInfoBucket = "DBINFO".encode()
DatabaseVerisionKey = "DatabaseVersion".encode()
HeadHeaderKey = "LastHeader".encode()
HeadBlockKey = "LastBlock".encode()
HeadFastBlockKey = "LastFast".encode()
FastTrieProgressKey = "TrieSync".encode()
HeaderPrefix = "h".encode()
HeaderTDSuffix = "t".encode()
HeaderHashSuffix = "n".encode()
HeaderNumberPrefix = "H".encode()
BlockBodyPrefix = "b".encode()
BlockReceiptsPrefix = "r".encode()
TxLookupPrefix = "l".encode()
BloomBitsPrefix = "B".encode()
PreimagePrefix = "secure-key-".encode()
ConfigPrefix = "ethereum-config-".encode()
BloomBitsIndexPrefix = "iB".encode()
BloomBitsIndexPrefixShead = "iBshead".encode()
LastPrunedBlockKey = "LastPrunedBlock".encode()
LastAppliedMigration = "lastAppliedMigration".encode()
StorageModeHistory = "smHistory".encode()
StorageModeReceipts = "smReceipts".encode()
StorageModeTxIndex = "smTxIndex".encode()
StorageModePreImages = "smPreImages".encode()
StorageModeIntermediateTrieHash = "smIntermediateTrieHash".encode()
SyncStageProgress = "SSP".encode()
SyncStageUnwind = "SSU".encode()
CliqueBucket = "clique-".encode()
# cat common/dbutils/bucket.go| grep '=' | grep byte | sed 's/\[\]byte(//' | sed 's/)//' | awk '{print $3}' | grep -v '//' | grep -v '=' | tr '\n' ','
buckets = ["PLAIN-CST", "PLAIN-contractCode", "PLAIN-ACS", "PLAIN-SCS", "CST", "hAT", "hST", "CODE", "contractCode",
"incarnationMap", "ACS", "SCS", "iTh", "iws", "DBINFO", "DatabaseVersion", "LastHeader", "LastBlock",
"LastFast", "TrieSync", "h", "t", "n", "H", "b", "r", "l", "B", "secure-key-", "ethereum-config-", "iB",
"iBshead", "LastPrunedBlock", "lastAppliedMigration", "smHistory",
"SSP", "SSU", "clique-", ]
def isHeaderHashKey(k):
l = common.BlockNumberLength + 1
return len(k) == l and k[l - 1:] == HeaderHashSuffix
def isHeaderTDKey(k):
l = common.BlockNumberLength + common.HashLength + 1
return len(k) == l and bytes.Equal(k[l - 1:], HeaderTDSuffix)
def isHeaderKey(k):
l = common.BlockNumberLength + common.HashLength
if len(k) != l:
return False
return (not isHeaderHashKey(k)) and (not isHeaderTDKey(k))

79
cmd/state/py/main.py Normal file
View File

@ -0,0 +1,79 @@
import csv
import struct
import plotly.express as px
import pandas as pd
import lmdb
import sys
import chain
import dbutils
import common
cmd = sys.argv[1]
chaindata = sys.argv[2]
env = lmdb.open(chaindata, max_dbs=100, readonly=True, subdir=True, map_size=32 * 1024 * 1024 * 1024, create=False)
analyticsEnv = lmdb.open("analytics", max_dbs=100, readonly=False, subdir=True, map_size=32 * 1024 * 1024 * 1024,
create=True)
env.reader_check() # clear stale reads
if cmd == "stats":
data = {"name": [], "size": []}
for bucket in dbutils.buckets:
b = env.open_db(bucket.encode(), create=False)
with env.begin(write=False) as txn:
stat = txn.stat(b)
data["name"].append(bucket)
data["size"].append(stat['psize'] * (stat['branch_pages'] + stat['leaf_pages'] + stat['overflow_pages']))
df = pd.DataFrame.from_dict(data)
fig = px.pie(df, values='size', names='name', title='Buckets size')
fig.show()
elif cmd == "gas_limits":
StartedWhenBlockNumber = chain.lastBlockNumber(env)
b = env.open_db(dbutils.HeaderPrefix, create=False)
mainHashes = analyticsEnv.open_db("gl_main_hashes".encode(), create=True)
def collect_main_hashes(readTx, writeTx):
with readTx.cursor(b) as curs:
for i, (k, v) in enumerate(curs.iternext()):
timestamp = common.bytesToUint64(k[:common.BlockNumberLength])
if timestamp > StartedWhenBlockNumber:
break
if not dbutils.isHeaderHashKey(k):
continue
mainHash = bytes(v)
writeTx.put(mainHash, common.uint64ToBytes(0), mainHashes)
def gas_limits(readTx, writeTx, file):
blockNum = 0
with readTx.cursor(b) as curs:
for i, (k, v) in enumerate(curs.iternext()):
timestamp = common.bytesToUint64(k[:common.BlockNumberLength])
if timestamp > StartedWhenBlockNumber:
break
if not dbutils.isHeaderKey(k):
continue
val = writeTx.get(k[common.BlockNumberLength:], None, mainHashes)
if val is None:
continue
header = chain.decode_block_header(v)
file.writerow([blockNum, header.GasLimit])
blockNum += 1
with env.begin(write=False) as txn:
with analyticsEnv.begin(write=True) as writeTx:
with open('gas_limits.csv', 'w') as csvfile:
collect_main_hashes(txn, writeTx)
print("Preloaded: %d" % writeTx.stat(mainHashes)["entries"])
gas_limits(txn, writeTx, csv.writer(csvfile))
else:
print("unknown command %s" % cmd)