mirror of
https://gitlab.com/pulsechaincom/erigon-pulse.git
synced 2024-12-22 03:30:37 +00:00
ethdb readme, db objects diagram (#1281)
* db objects diagram * db objects diagram * db objects diagram * db objects diagram * db objects diagram * db objects diagram * db objects diagram
This commit is contained in:
parent
dd5258d524
commit
0bc61c06ed
@ -1,232 +0,0 @@
|
||||
## Target:
|
||||
|
||||
To build 1 key-value abstraction on top of LMDB and RemoteKV (our own read-only TCP protocol for key-value databases).
|
||||
|
||||
## Design principles:
|
||||
- No internal copies/allocations. It means app must copy keys/values before put to database.
|
||||
- Low-level API: as close to original LMDB as possible.
|
||||
- Expose concept of transaction - app-level code can Begin/Commit/Rollback
|
||||
- If your are not familiar with "DupSort" concept, please read [indices.md](./../docs/programmers_guide/indices.md) first.
|
||||
|
||||
## Result interface:
|
||||
|
||||
```
|
||||
// ethdb/kv_abstract.go
|
||||
|
||||
// KV low-level database interface - main target is - to provide common abstraction over top of LMDB and RemoteKV.
|
||||
//
|
||||
// Common pattern for short-living transactions:
|
||||
//
|
||||
// if err := db.View(ctx, func(tx ethdb.Tx) error {
|
||||
// ... code which uses database in transaction
|
||||
// }); err != nil {
|
||||
// return err
|
||||
// }
|
||||
//
|
||||
// Common pattern for long-living transactions:
|
||||
// tx, err := db.Begin(true)
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// defer tx.Rollback()
|
||||
//
|
||||
// ... code which uses database in transaction
|
||||
//
|
||||
// err := tx.Commit()
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
//
|
||||
type KV interface {
|
||||
View(ctx context.Context, f func(tx Tx) error) error
|
||||
Update(ctx context.Context, f func(tx Tx) error) error
|
||||
Close()
|
||||
|
||||
// Begin - creates transaction
|
||||
// tx may be discarded by .Rollback() method
|
||||
//
|
||||
// A transaction and its cursors must only be used by a single
|
||||
// thread (not goroutine), and a thread may only have a single transaction at a time.
|
||||
// It happen automatically by - because this method calls runtime.LockOSThread() inside (Rollback/Commit releases it)
|
||||
// By this reason application code can't call runtime.UnlockOSThread() - it leads to undefined behavior.
|
||||
//
|
||||
// If this `parent` is non-NULL, the new transaction
|
||||
// will be a nested transaction, with the transaction indicated by parent
|
||||
// as its parent. Transactions may be nested to any level. A parent
|
||||
// transaction and its cursors may not issue any other operations than
|
||||
// Commit and Rollback while it has active child transactions.
|
||||
Begin(ctx context.Context, parent Tx, writable bool) (Tx, error)
|
||||
AllBuckets() dbutils.BucketsCfg
|
||||
}
|
||||
|
||||
type Tx interface {
|
||||
// Cursor - creates cursor object on top of given bucket. Type of cursor - depends on bucket configuration.
|
||||
// If bucket was created with lmdb.DupSort flag, then cursor with interface CursorDupSort created
|
||||
// If bucket was created with lmdb.DupFixed flag, then cursor with interface CursorDupFixed created
|
||||
// Otherwise - object of interface Cursor created
|
||||
//
|
||||
// Cursor, also provides a grain of magic - it can use a declarative configuration - and automatically break
|
||||
// long keys into DupSort key/values. See docs for `bucket.go:BucketConfigItem`
|
||||
Cursor(bucket string) Cursor
|
||||
CursorDupSort(bucket string) CursorDupSort // CursorDupSort - can be used if bucket has lmdb.DupSort flag
|
||||
CursorDupFixed(bucket string) CursorDupFixed // CursorDupSort - can be used if bucket has lmdb.DupFixed flag
|
||||
Get(bucket string, key []byte) (val []byte, err error)
|
||||
|
||||
Commit(ctx context.Context) error // Commit all the operations of a transaction into the database.
|
||||
Rollback() // Rollback - abandon all the operations of the transaction instead of saving them.
|
||||
|
||||
BucketSize(name string) (uint64, error)
|
||||
}
|
||||
|
||||
// Interface used for buckets migration, don't use it in usual app code
|
||||
type BucketMigrator interface {
|
||||
DropBucket(string) error
|
||||
CreateBucket(string) error
|
||||
ExistsBucket(string) bool
|
||||
ClearBucket(string) error
|
||||
ExistingBuckets() ([]string, error)
|
||||
}
|
||||
|
||||
// Cursor - class for navigating through a database
|
||||
// CursorDupSort and CursorDupFixed are inherit this class
|
||||
//
|
||||
// If methods (like First/Next/Seek) return error, then returned key SHOULD not be nil (can be []byte{} for example).
|
||||
// Then looping code will look as:
|
||||
// c := kv.Cursor(bucketName)
|
||||
// for k, v, err := c.First(); k != nil; k, v, err = c.Next() {
|
||||
// if err != nil {
|
||||
// return err
|
||||
// }
|
||||
// ... logic
|
||||
// }
|
||||
type Cursor interface {
|
||||
Prefix(v []byte) Cursor // Prefix returns only keys with given prefix, useful RemoteKV - because filtering done by server
|
||||
Prefetch(v uint) Cursor // Prefetch enables data streaming - used only by RemoteKV
|
||||
|
||||
First() ([]byte, []byte, error) // First - position at first key/data item
|
||||
Seek(seek []byte) ([]byte, []byte, error) // Seek - position at first key greater than or equal to specified key
|
||||
SeekExact(key []byte) ([]byte, error) // SeekExact - position at first key greater than or equal to specified key
|
||||
Next() ([]byte, []byte, error) // Next - position at next key/value (can iterate over DupSort key/values automatically)
|
||||
Prev() ([]byte, []byte, error) // Prev - position at previous key
|
||||
Last() ([]byte, []byte, error) // Last - position at last key and last possible value
|
||||
Current() ([]byte, []byte, error) // Current - return key/data at current cursor position
|
||||
|
||||
Put(k, v []byte) error // Put - based on order
|
||||
Append(k []byte, v []byte) error // Append - append the given key/data pair to the end of the database. This option allows fast bulk loading when keys are already known to be in the correct order.
|
||||
Delete(key []byte) error
|
||||
|
||||
// DeleteCurrent This function deletes the key/data pair to which the cursor refers.
|
||||
// This does not invalidate the cursor, so operations such as MDB_NEXT
|
||||
// can still be used on it.
|
||||
// Both MDB_NEXT and MDB_GET_CURRENT will return the same record after
|
||||
// this operation.
|
||||
DeleteCurrent() error
|
||||
|
||||
// PutNoOverwrite(key, value []byte) error
|
||||
// Reserve()
|
||||
|
||||
// PutCurrent - replace the item at the current cursor position.
|
||||
// The key parameter must still be provided, and must match it.
|
||||
// If using sorted duplicates (#MDB_DUPSORT) the data item must still
|
||||
// sort into the same place. This is intended to be used when the
|
||||
// new data is the same size as the old. Otherwise it will simply
|
||||
// perform a delete of the old record followed by an insert.
|
||||
PutCurrent(key, value []byte) error
|
||||
}
|
||||
|
||||
type CursorDupSort interface {
|
||||
Cursor
|
||||
|
||||
SeekBothExact(key, value []byte) ([]byte, []byte, error)
|
||||
SeekBothRange(key, value []byte) ([]byte, []byte, error)
|
||||
FirstDup() ([]byte, error) // FirstDup - position at first data item of current key
|
||||
NextDup() ([]byte, []byte, error) // NextDup - position at next data item of current key
|
||||
NextNoDup() ([]byte, []byte, error) // NextNoDup - position at first data item of next key
|
||||
LastDup() ([]byte, error) // LastDup - position at last data item of current key
|
||||
|
||||
CountDuplicates() (uint64, error) // CountDuplicates - number of duplicates for the current key
|
||||
DeleteCurrentDuplicates() error // DeleteCurrentDuplicates - deletes all of the data items for the current key
|
||||
AppendDup(key, value []byte) error // AppendDup - same as Append, but for sorted dup data
|
||||
|
||||
//PutIfNoDup() // Store the key-value pair only if key is not present
|
||||
}
|
||||
|
||||
// CursorDupFixed - has methods valid for buckets with lmdb.DupFixed flag
|
||||
// See also lmdb.WrapMulti
|
||||
type CursorDupFixed interface {
|
||||
CursorDupSort
|
||||
|
||||
// GetMulti - return up to a page of duplicate data items from current cursor position
|
||||
// After return - move cursor to prepare for #MDB_NEXT_MULTIPLE
|
||||
GetMulti() ([]byte, error)
|
||||
// NextMulti - return up to a page of duplicate data items from next cursor position
|
||||
// After return - move cursor to prepare for #MDB_NEXT_MULTIPLE
|
||||
NextMulti() ([]byte, []byte, error)
|
||||
// PutMulti store multiple contiguous data elements in a single request.
|
||||
// Panics if len(page) is not a multiple of stride.
|
||||
// The cursor's bucket must be DupFixed and DupSort.
|
||||
PutMulti(key []byte, page []byte, stride int) error
|
||||
// ReserveMulti()
|
||||
}
|
||||
|
||||
type HasStats interface {
|
||||
DiskSize(context.Context) (uint64, error) // db size
|
||||
}
|
||||
```
|
||||
|
||||
## Rationale and Features list:
|
||||
|
||||
#### Buckets concept:
|
||||
- Bucket is an interface, can’t be nil, can't return error
|
||||
|
||||
#### InMemory, ReadOnly, MultipleDatabases, Customization:
|
||||
- `NewLMDB().InMem().ReadOnly().Open()`
|
||||
- `NewLMDB().Path(path).WithBucketsConfig(config).Open()`
|
||||
|
||||
#### Context:
|
||||
- For transactions - yes
|
||||
- For .First() and .Next() methods - no
|
||||
|
||||
#### Cursor/Iterator:
|
||||
- Cursor is an interface, can’t be nil. `db.Cursor()` can't return error
|
||||
- `cursor.Prefix(prefix)` filtering keys by given prefix. RemoteKV - to support server side filtering.
|
||||
- `cursor.Prefetch(1000)` - useful for Remote
|
||||
- No Lazy values
|
||||
- Methods .First, .Next, .Seek - can return error.
|
||||
If err!=nil then key SHOULD be !=nil (can be []byte{} for example).
|
||||
Then looping code will look as:
|
||||
```go
|
||||
for k, v, err := c.First(); k != nil; k, v, err = c.Next() {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// logic
|
||||
}
|
||||
```
|
||||
|
||||
#### Managed/un-managed transactions
|
||||
- Tx is an interface
|
||||
- db.Update, db.View - yes
|
||||
- db.Batch - no
|
||||
- all keys and values returned by all method are valid until end of transaction
|
||||
- transaction object can be used only withing 1 goroutine
|
||||
- it's safe to call .Rollback() after .Commit(), multiple rollbacks are also safe. Common transaction patter:
|
||||
```
|
||||
tx, err := db.Begin(true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback()
|
||||
|
||||
// ... code which uses database in transaction
|
||||
|
||||
err := tx.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
## Not covered by Abstractions:
|
||||
- TTL of keys
|
||||
- Nested Buckets
|
||||
- Backups
|
130
ethdb/Readme.md
Normal file
130
ethdb/Readme.md
Normal file
@ -0,0 +1,130 @@
|
||||
#### `Ethdb` package hold's bouquet of objects to access DB
|
||||
|
||||
Words "KV" and "DB" have special meaning here:
|
||||
- KV - key-value-style API to access data: let developer manage transactions, stateful cursors.
|
||||
- DB - object-oriented-style API to access data: Get/Put/Delete/WalkOverTable/MultiPut, managing transactions internally.
|
||||
|
||||
So, DB abstraction fits 95% times and leads to more maintainable code - because it's looks stateless.
|
||||
|
||||
About "key-value-style": Modern key-value databases don't provide Get/Put/Delete methods,
|
||||
because it's very hard-drive-unfriendly - it pushes developers do random-disk-access which is [order of magnitude slower than sequential read](https://www.seagate.com/sg/en/tech-insights/lies-damn-lies-and-ssd-benchmark-master-ti/).
|
||||
To enforce sequential-reads - introduced stateful cursors/iterators - they intentionally look as file-api: open_cursor/seek/write_data_from_current_position/move_to_end/step_back/step_forward/delete_key_on_current_position/append.
|
||||
|
||||
## Class diagram:
|
||||
|
||||
```asciiflow.com
|
||||
// This is not call graph, just show classes from low-level to high-level.
|
||||
// And show which classes satisfy which interfaces.
|
||||
|
||||
+-----------------------------------+ +-----------------------------------+ +-----------------------------------+
|
||||
| github.com/ledgerwatch/lmdb-go | | github.com/torquem-ch/mdbx-go | | google.golang.org/grpc.ClientConn |
|
||||
| (app-agnostic LMDB go bindings) | | (app-agnostic MDBX go bindings) | | (app-agnostic RPC and streaming) |
|
||||
+-----------------------------------+ +-----------------------------------+ +-----------------------------------+
|
||||
| | |
|
||||
| | |
|
||||
v v v
|
||||
+-----------------------------------+ +-----------------------------------+ +-----------------------------------+
|
||||
| ethdb/kv_lmdb.go | | ethdb/kv_mdbx.go | | ethdb/kv_remote.go |
|
||||
| (tg-specific LMDB implementaion) | | (tg-specific MDBX implementaion) | | (tg-specific remote DB access) |
|
||||
+-----------------------------------+ +-----------------------------------+ +-----------------------------------+
|
||||
| | |
|
||||
| | |
|
||||
v v v
|
||||
+----------------------------------------------------------------------------------------------+
|
||||
| ethdb/kv_abstract.go |
|
||||
| (Common KV interface. DB-friendly, disk-friendly, cpu-cache-friendly. |
|
||||
| Same app code can work with local or remote database. |
|
||||
| Allows experiment with another database implementations. |
|
||||
| Supports context.Context for cancelation. Any operation can return error) |
|
||||
+----------------------------------------------------------------------------------------------+
|
||||
| | |
|
||||
| | |
|
||||
v v v
|
||||
+-----------------------------------+ +-----------------------------------+ +-----------------------------------+
|
||||
| ethdb/object_db.go | | ethdb/tx_db.go | | ethdb/remote/remotedbserver |
|
||||
| (thread-safe, stateless, | | (non-thread-safe, more performant | | (grpc server, using kv_abstract, |
|
||||
| opens/close short transactions | | than object_db, method Begin | | kv_remote call this server, 1 |
|
||||
| internally when need) | | DOESN'T create new TxDb object) | | transaction maps on 1 grpc stream |
|
||||
+-----------------------------------+ +-----------------------------------+ +-----------------------------------+
|
||||
| |
|
||||
| |
|
||||
v v
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
| ethdb/interface.go |
|
||||
| (Common DB interfaces. ethdb.Database and ethdb.DbWithPendingMutations are widely used) |
|
||||
+-----------------------------------------------------------------------------------------------+
|
||||
|
|
||||
|
|
||||
v
|
||||
+--------------------------------------------------+
|
||||
| ethdb/mutation.go |
|
||||
| (also known as "batch", recording all writes and |
|
||||
| them flush to DB in sorted way only when call |
|
||||
| .Commit(), use it to avoid random-writes. |
|
||||
| It use and satisfy ethdb.Database in same time |
|
||||
+--------------------------------------------------+
|
||||
|
||||
```
|
||||
|
||||
|
||||
## ethdb.AbstractKV design:
|
||||
|
||||
- InMemory, ReadOnly: `NewLMDB().InMem().ReadOnly().Open()`
|
||||
- MultipleDatabases, Customization: `NewLMDB().Path(path).WithBucketsConfig(config).Open()`
|
||||
|
||||
|
||||
- 1 Transaction object can be used only withing 1 goroutine.
|
||||
- Only 1 write transaction can be active at a time (other will wait).
|
||||
- Unlimited read transactions can be active concurrently (not blocked by write transaction).
|
||||
|
||||
|
||||
- Methods db.Update, db.View - can be used to open and close short transaction.
|
||||
- Methods Begin/Commit/Rollback - for long transaction.
|
||||
- it's safe to call .Rollback() after .Commit(), multiple rollbacks are also safe. Common transaction patter:
|
||||
```
|
||||
tx, err := db.Begin(true, nil, ethdb.RW)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer tx.Rollback() // important to avoid transactions leak at panic or early return
|
||||
|
||||
// ... code which uses database in transaction
|
||||
|
||||
err := tx.Commit()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
- No internal copies/allocations. It means: 1. app must copy keys/values before put to database. 2. Data after read from db - valid only during current transaction - copy it if plan use data after transaction Commit/Rollback.
|
||||
- Methods .Bucket() and .Cursor(), can’t return nil, can't return error.
|
||||
- Bucket and Cursor - are interfaces - means different classes can satisfy it: for example `LmdbCursor`, `LmdbDupSortCursor`, `LmdbDupFixedCursor` classes satisfy it.
|
||||
If your are not familiar with "DupSort" concept, please read [indices.md](./../docs/programmers_guide/indices.md) first.
|
||||
|
||||
|
||||
- If Cursor returns err!=nil then key SHOULD be != nil (can be []byte{} for example).
|
||||
Then traversal code look as:
|
||||
```go
|
||||
for k, v, err := c.First(); k != nil; k, v, err = c.Next() {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// logic
|
||||
}
|
||||
```
|
||||
- Move cursor: `cursor.Seek(key)`
|
||||
|
||||
|
||||
|
||||
## ethdb.Database design:
|
||||
|
||||
- Allows pass multiple implementations
|
||||
- Allows traversal tables by `db.Walk` and `db.MultiWalk`
|
||||
|
||||
## ethdb.TxDb design:
|
||||
- holds inside 1 long-running transaction and 1 cursor per table
|
||||
- method Begin DOESN'T create new TxDb object, it means this object can be passed into other objects by pointer,
|
||||
and high-level app code can start/commit transactions when it needs without re-creating all objects which holds
|
||||
TxDb pointer.
|
||||
- This is reason why txDb.CommitAndBegin() method works: inside it creating new transaction object, pinter to TxDb stays valid.
|
Loading…
Reference in New Issue
Block a user