erigon-pulse/migrations/migrations.go

273 lines
7.2 KiB
Go
Raw Normal View History

package migrations
import (
"bytes"
bitmap indices for logs (#1124) * save progress * try now * don't create bloom inside rlpDecode * don't create bloom inside ApplyTransaction * clean * clean * clean * clean * clean * clean * clean * clean * rename method * print timings * print timings * print timings * sort before flush * fix err lint * clean * move tests to transactions * compressed version * up bound * up bound * more tests * more tests * more tests * more tests * better removal * clean * better performance of get/put methods * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * optimize rpcdaemon * fix test * fix rpcdaemon * fix test * simplify * simplify * fix nil pointer * clean * revert some changes * add some logs * clean * try without optimize * clean * clean * clean * clean * try * move log_index to own stage * move log_index to own stage * integration add log_index stage * integration add log_index stage * clean * clean * print timing * remove duplicates at unwind * extract truncateBitmaps func * try detect * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * add blackList of topics * clean * clean * clean * clean * clean * clean * clean * clean * sharding 1 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 3 * sharded 3 * sharded 3 * speedup things by putCurrent and putReserve * clean * optimize trim * clean * remove blacklist * add more info to err * ? * clean * clean * clean * clean * clean * working version * switch to cgo version of roaring bitmaps * clean * clean * clean * clean * more docs * clean * clean * fix logs bloom field * Fix debug_getModifiedAccountsByNumber * Try to fix crash * fix problem with "absent block" * fix problem with "absent block" * remove optimize method call * remove roaring iterator * fix problem with rebuild indicess * remove debug prints * tests for eth_getLogs involving topics * add tests for new stage, speparate topics into 2 buckets * version up * remove debug logs * remove debug logs * remove bloom filter implementation * Optimisation * Optimisatin not required, make rpctest lenient to geth errors * Lenient to geth failures Co-authored-by: Alexey Akhunov <akhounov@gmail.com>
2020-09-28 17:18:36 +00:00
"context"
"errors"
"fmt"
"path"
"github.com/ledgerwatch/turbo-geth/common"
"github.com/ledgerwatch/turbo-geth/common/dbutils"
"github.com/ledgerwatch/turbo-geth/common/etl"
"github.com/ledgerwatch/turbo-geth/eth/stagedsync/stages"
"github.com/ledgerwatch/turbo-geth/ethdb"
"github.com/ledgerwatch/turbo-geth/log"
"github.com/ugorji/go/codec"
)
// migrations apply sequentially in order of this array, skips applied migrations
// it allows - don't worry about merge conflicts and use switch branches
// see also dbutils.Migrations - it stores context in which each transaction was exectured - useful for bug-reports
//
// Idempotency is expected
// Best practices to achieve Idempotency:
// - in dbutils/bucket.go add suffix for existing bucket variable, create new bucket with same variable name.
// Example:
// - SyncStageProgress = []byte("SSP1")
// + SyncStageProgressOld1 = []byte("SSP1")
// + SyncStageProgress = []byte("SSP2")
// - in the beginning of migration: check that old bucket exists, clear new bucket
// - in the end:drop old bucket (not in defer!).
// Example:
// Up: func(db ethdb.Database, tmpdir string, OnLoadCommit etl.LoadCommitHandler) error {
2020-09-08 19:39:43 +00:00
// if exists, err := db.(ethdb.BucketsMigrator).BucketExists(dbutils.SyncStageProgressOld1); err != nil {
// return err
// } else if !exists {
2020-08-10 09:16:14 +00:00
// return OnLoadCommit(db, nil, true)
// }
//
2020-09-08 19:39:43 +00:00
// if err := db.(ethdb.BucketsMigrator).ClearBuckets(dbutils.SyncStageProgress); err != nil {
// return err
// }
//
// extractFunc := func(k []byte, v []byte, next etl.ExtractNextFunc) error {
// ... // migration logic
// }
// if err := etl.Transform(...); err != nil {
// return err
// }
//
2020-09-08 19:39:43 +00:00
// if err := db.(ethdb.BucketsMigrator).DropBuckets(dbutils.SyncStageProgressOld1); err != nil { // clear old bucket
// return err
// }
// },
// - if you need migrate multiple buckets - create separate migration for each bucket
// - write test where apply migration twice
var migrations = []Migration{
stagesToUseNamedKeys,
unwindStagesToUseNamedKeys,
stagedsyncToUseStageBlockhashes,
unwindStagedsyncToUseStageBlockhashes,
dupSortHashState,
dupSortPlainState,
dupSortIH,
bitmap indices for logs (#1124) * save progress * try now * don't create bloom inside rlpDecode * don't create bloom inside ApplyTransaction * clean * clean * clean * clean * clean * clean * clean * clean * rename method * print timings * print timings * print timings * sort before flush * fix err lint * clean * move tests to transactions * compressed version * up bound * up bound * more tests * more tests * more tests * more tests * better removal * clean * better performance of get/put methods * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * optimize rpcdaemon * fix test * fix rpcdaemon * fix test * simplify * simplify * fix nil pointer * clean * revert some changes * add some logs * clean * try without optimize * clean * clean * clean * clean * try * move log_index to own stage * move log_index to own stage * integration add log_index stage * integration add log_index stage * clean * clean * print timing * remove duplicates at unwind * extract truncateBitmaps func * try detect * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * add blackList of topics * clean * clean * clean * clean * clean * clean * clean * clean * sharding 1 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 3 * sharded 3 * sharded 3 * speedup things by putCurrent and putReserve * clean * optimize trim * clean * remove blacklist * add more info to err * ? * clean * clean * clean * clean * clean * working version * switch to cgo version of roaring bitmaps * clean * clean * clean * clean * more docs * clean * clean * fix logs bloom field * Fix debug_getModifiedAccountsByNumber * Try to fix crash * fix problem with "absent block" * fix problem with "absent block" * remove optimize method call * remove roaring iterator * fix problem with rebuild indicess * remove debug prints * tests for eth_getLogs involving topics * add tests for new stage, speparate topics into 2 buckets * version up * remove debug logs * remove debug logs * remove bloom filter implementation * Optimisation * Optimisatin not required, make rpctest lenient to geth errors * Lenient to geth failures Co-authored-by: Alexey Akhunov <akhounov@gmail.com>
2020-09-28 17:18:36 +00:00
clearIndices,
resetIHBucketToRecoverDB,
receiptsCborEncode,
receiptsOnePerTx,
ChangeSets dupsort (#1342) * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * change_set_dup * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * working version * aa * aa * aa * aa * aa * aa * aa * aa * aa * aa * aa * aa * aa * aa * aa * squash * squash * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * fix * history_early_stop * history_early_stop * vmConfig with ReadOnly false * auto_increment * auto_increment * rebase master Co-authored-by: Alexey Akhunov <akhounov@gmail.com>
2020-11-16 12:08:28 +00:00
accChangeSetDupSort,
storageChangeSetDupSort,
transactionsTable,
historyAccBitmap,
historyStorageBitmap,
splitHashStateBucket,
splitIHBucket,
}
type Migration struct {
Name string
Up func(db ethdb.Database, tmpdir string, progress []byte, OnLoadCommitOnLoadCommit etl.LoadCommitHandler) error
}
var (
ErrMigrationNonUniqueName = fmt.Errorf("please provide unique migration name")
ErrMigrationCommitNotCalled = fmt.Errorf("migraion commit function was not called")
ErrMigrationETLFilesDeleted = fmt.Errorf("db migration progress was interrupted after extraction step and ETL files was deleted, please contact development team for help or re-sync from scratch")
)
func NewMigrator() *Migrator {
return &Migrator{
Migrations: migrations,
}
}
type Migrator struct {
Migrations []Migration
}
func AppliedMigrations(db ethdb.Database, withPayload bool) (map[string][]byte, error) {
applied := map[string][]byte{}
err := db.Walk(dbutils.Migrations, nil, 0, func(k []byte, v []byte) (bool, error) {
if bytes.HasPrefix(k, []byte("_progress_")) {
return true, nil
}
if withPayload {
applied[string(common.CopyBytes(k))] = common.CopyBytes(v)
} else {
applied[string(common.CopyBytes(k))] = []byte{}
}
return true, nil
})
return applied, err
}
func (m *Migrator) HasPendingMigrations(db ethdb.Database) (bool, error) {
pending, err := m.PendingMigrations(db)
if err != nil {
return false, err
}
return len(pending) > 0, nil
}
func (m *Migrator) PendingMigrations(db ethdb.Database) ([]Migration, error) {
applied, err := AppliedMigrations(db, false)
if err != nil {
return nil, err
}
counter := 0
for i := range m.Migrations {
v := m.Migrations[i]
if _, ok := applied[v.Name]; ok {
continue
}
counter++
}
pending := make([]Migration, 0, counter)
for i := range m.Migrations {
v := m.Migrations[i]
if _, ok := applied[v.Name]; ok {
continue
}
pending = append(pending, v)
}
return pending, nil
}
func (m *Migrator) Apply(db ethdb.Database, tmpdir string) error {
if len(m.Migrations) == 0 {
return nil
}
applied, err1 := AppliedMigrations(db, false)
if err1 != nil {
return err1
}
// migration names must be unique, protection against people's mistake
uniqueNameCheck := map[string]bool{}
for i := range m.Migrations {
_, ok := uniqueNameCheck[m.Migrations[i].Name]
if ok {
return fmt.Errorf("%w, duplicate: %s", ErrMigrationNonUniqueName, m.Migrations[i].Name)
}
uniqueNameCheck[m.Migrations[i].Name] = true
}
tx, err1 := db.Begin(context.Background(), ethdb.RW)
if err1 != nil {
return err1
2020-09-08 19:39:43 +00:00
}
defer tx.Rollback()
for i := range m.Migrations {
v := m.Migrations[i]
if _, ok := applied[v.Name]; ok {
continue
}
commitFuncCalled := false // commit function must be called if no error, protection against people's mistake
log.Info("Apply migration", "name", v.Name)
progress, err := tx.Get(dbutils.Migrations, []byte("_progress_"+v.Name))
if err != nil && !errors.Is(err, ethdb.ErrKeyNotFound) {
return err
}
if err = v.Up(tx, path.Join(tmpdir, "migrations", v.Name), progress, func(_ ethdb.Putter, key []byte, isDone bool) error {
if !isDone {
if key != nil {
err = tx.Put(dbutils.Migrations, []byte("_progress_"+v.Name), key)
if err != nil {
return err
}
}
// do commit, but don't save partial progress
if err := tx.CommitAndBegin(context.Background()); err != nil {
return err
}
return nil
}
commitFuncCalled = true
2020-09-08 19:39:43 +00:00
stagesProgress, err := MarshalMigrationPayload(tx)
if err != nil {
return err
}
err = tx.Put(dbutils.Migrations, []byte(v.Name), stagesProgress)
if err != nil {
return err
}
err = tx.Delete(dbutils.Migrations, []byte("_progress_"+v.Name), nil)
if err != nil {
return err
}
2020-09-08 19:39:43 +00:00
bitmap indices for logs (#1124) * save progress * try now * don't create bloom inside rlpDecode * don't create bloom inside ApplyTransaction * clean * clean * clean * clean * clean * clean * clean * clean * rename method * print timings * print timings * print timings * sort before flush * fix err lint * clean * move tests to transactions * compressed version * up bound * up bound * more tests * more tests * more tests * more tests * better removal * clean * better performance of get/put methods * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * optimize rpcdaemon * fix test * fix rpcdaemon * fix test * simplify * simplify * fix nil pointer * clean * revert some changes * add some logs * clean * try without optimize * clean * clean * clean * clean * try * move log_index to own stage * move log_index to own stage * integration add log_index stage * integration add log_index stage * clean * clean * print timing * remove duplicates at unwind * extract truncateBitmaps func * try detect * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * clean * add blackList of topics * clean * clean * clean * clean * clean * clean * clean * clean * sharding 1 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 2 * sharded 3 * sharded 3 * sharded 3 * speedup things by putCurrent and putReserve * clean * optimize trim * clean * remove blacklist * add more info to err * ? * clean * clean * clean * clean * clean * working version * switch to cgo version of roaring bitmaps * clean * clean * clean * clean * more docs * clean * clean * fix logs bloom field * Fix debug_getModifiedAccountsByNumber * Try to fix crash * fix problem with "absent block" * fix problem with "absent block" * remove optimize method call * remove roaring iterator * fix problem with rebuild indicess * remove debug prints * tests for eth_getLogs involving topics * add tests for new stage, speparate topics into 2 buckets * version up * remove debug logs * remove debug logs * remove bloom filter implementation * Optimisation * Optimisatin not required, make rpctest lenient to geth errors * Lenient to geth failures Co-authored-by: Alexey Akhunov <akhounov@gmail.com>
2020-09-28 17:18:36 +00:00
if err := tx.CommitAndBegin(context.Background()); err != nil {
2020-09-08 19:39:43 +00:00
return err
}
return nil
}); err != nil {
return err
}
if !commitFuncCalled {
2020-08-12 02:57:55 +00:00
return fmt.Errorf("%w: %s", ErrMigrationCommitNotCalled, v.Name)
}
log.Info("Applied migration", "name", v.Name)
}
return nil
}
2020-08-04 09:25:28 +00:00
func MarshalMigrationPayload(db ethdb.Getter) ([]byte, error) {
s := map[string][]byte{}
buf := bytes.NewBuffer(nil)
encoder := codec.NewEncoder(buf, &codec.CborHandle{})
for _, stage := range stages.AllStages {
v, err := db.Get(dbutils.SyncStageProgress, stage)
if err != nil && !errors.Is(err, ethdb.ErrKeyNotFound) {
return nil, err
2020-08-04 09:25:28 +00:00
}
if len(v) > 0 {
s[string(stage)] = common.CopyBytes(v)
}
v, err = db.Get(dbutils.SyncStageUnwind, stage)
if err != nil && !errors.Is(err, ethdb.ErrKeyNotFound) {
return nil, err
}
if len(v) > 0 {
s["unwind_"+string(stage)] = common.CopyBytes(v)
}
}
if err := encoder.Encode(s); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
func UnmarshalMigrationPayload(data []byte) (map[string][]byte, error) {
s := map[string][]byte{}
if err := codec.NewDecoder(bytes.NewReader(data), &codec.CborHandle{}).Decode(&s); err != nil {
return nil, err
}
return s, nil
}