/* Copyright 2022 Erigon contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package kv import ( "context" "errors" "fmt" "unsafe" "github.com/VictoriaMetrics/metrics" "github.com/ledgerwatch/erigon-lib/kv/iter" "github.com/ledgerwatch/erigon-lib/kv/order" ) //Variables Naming: // tx - Database Transaction // txn - Ethereum Transaction (and TxNum - is also number of Etherum Transaction) // blockNum - Ethereum block number - same across all nodes. blockID - auto-increment ID - which can be differrent across all nodes // txNum/txID - same // RoTx - Read-Only Database Transaction. RwTx - read-write // k, v - key, value // ts - TimeStamp. Usually it's Etherum's TransactionNumber (auto-increment ID). Or BlockNumber. // Cursor - low-level mdbx-tide api to navigate over Table // Iter - high-level iterator-like api over Table/InvertedIndex/History/Domain. Has less features than Cursor. See package `iter`. //Methods Naming: // Prune: delete old data // Unwind: delete recent data // Get: exact match of criterias // Range: [from, to). from=nil means StartOfTable, to=nil means EndOfTable, rangeLimit=-1 means Unlimited // Range is analog of SQL's: SELECT * FROM Table WHERE k>=from AND k stream of server-side pushes --- // Range [from, to) // Range(from, nil) means [from, EndOfTable) // Range(nil, to) means [StartOfTable, to) Range(table string, fromPrefix, toPrefix []byte) (iter.KV, error) // Stream is like Range, but for requesting huge data (Example: full table scan). Client can't stop it. //Stream(table string, fromPrefix, toPrefix []byte) (iter.KV, error) // RangeAscend - like Range [from, to) but also allow pass Limit parameters // Limit -1 means Unlimited RangeAscend(table string, fromPrefix, toPrefix []byte, limit int) (iter.KV, error) //StreamAscend(table string, fromPrefix, toPrefix []byte, limit int) (iter.KV, error) // RangeDescend - is like Range [from, to), but expecing `from`<`to` // example: RangeDescend("Table", "B", "A", -1) RangeDescend(table string, fromPrefix, toPrefix []byte, limit int) (iter.KV, error) //StreamDescend(table string, fromPrefix, toPrefix []byte, limit int) (iter.KV, error) // Prefix - is exactly Range(Table, prefix, kv.NextSubtree(prefix)) Prefix(table string, prefix []byte) (iter.KV, error) // RangeDupSort - like Range but for fixed single key and iterating over range of values RangeDupSort(table string, key []byte, fromPrefix, toPrefix []byte, asc order.By, limit int) (iter.KV, error) // --- High-Level methods: 1request -> 1page of values in response -> send next page request --- // Paginate(table string, fromPrefix, toPrefix []byte) (PairsStream, error) // --- High-Level deprecated methods --- ForEach(table string, fromPrefix []byte, walker func(k, v []byte) error) error ForPrefix(table string, prefix []byte, walker func(k, v []byte) error) error ForAmount(table string, prefix []byte, amount uint32, walker func(k, v []byte) error) error // Pointer to the underlying C transaction handle (e.g. *C.MDBX_txn) CHandle() unsafe.Pointer BucketSize(table string) (uint64, error) } // RwTx // // WARNING: // - RwTx is not threadsafe and may only be used in the goroutine that created it. // - ReadOnly transactions do not lock goroutine to thread, RwTx does // - User Can't call runtime.LockOSThread/runtime.UnlockOSThread in same goroutine until RwTx Commit/Rollback type RwTx interface { Tx StatelessWriteTx BucketMigrator RwCursor(table string) (RwCursor, error) RwCursorDupSort(table string) (RwCursorDupSort, error) // CollectMetrics - does collect all DB-related and Tx-related metrics // this method exists only in RwTx to avoid concurrency CollectMetrics() } type BucketMigratorRO interface { ListBuckets() ([]string, error) } // BucketMigrator used for buckets migration, don't use it in usual app code type BucketMigrator interface { BucketMigratorRO DropBucket(string) error CreateBucket(string) error ExistsBucket(string) (bool, error) ClearBucket(string) error } // Cursor - class for navigating through a database // CursorDupSort are inherit this class // // If methods (like First/Next/Seek) return error, then returned key SHOULD not be nil (can be []byte{} for example). // Then looping code will look as: // c := kv.Cursor(bucketName) // // for k, v, err := c.First(); k != nil; k, v, err = c.Next() { // if err != nil { // return err // } // ... logic // } type Cursor interface { First() ([]byte, []byte, error) // First - position at first key/data item Seek(seek []byte) ([]byte, []byte, error) // Seek - position at first key greater than or equal to specified key SeekExact(key []byte) ([]byte, []byte, error) // SeekExact - position at exact matching key if exists Next() ([]byte, []byte, error) // Next - position at next key/value (can iterate over DupSort key/values automatically) Prev() ([]byte, []byte, error) // Prev - position at previous key Last() ([]byte, []byte, error) // Last - position at last key and last possible value Current() ([]byte, []byte, error) // Current - return key/data at current cursor position Count() (uint64, error) // Count - fast way to calculate amount of keys in bucket. It counts all keys even if Prefix was set. Close() } type RwCursor interface { Cursor Put(k, v []byte) error // Put - based on order Append(k []byte, v []byte) error // Append - append the given key/data pair to the end of the database. This option allows fast bulk loading when keys are already known to be in the correct order. Delete(k []byte) error // Delete - short version of SeekExact+DeleteCurrent or SeekBothExact+DeleteCurrent // DeleteCurrent This function deletes the key/data pair to which the cursor refers. // This does not invalidate the cursor, so operations such as MDB_NEXT // can still be used on it. // Both MDB_NEXT and MDB_GET_CURRENT will return the same record after // this operation. DeleteCurrent() error } // CursorDupSort // // Example: // // for k, v, err = cursor.First(); k != nil; k, v, err = cursor.NextNoDup() { // if err != nil { // return err // } // for ; v != nil; _, v, err = cursor.NextDup() { // if err != nil { // return err // } // // } // } type CursorDupSort interface { Cursor // SeekBothExact - // second parameter can be nil only if searched key has no duplicates, or return error SeekBothExact(key, value []byte) ([]byte, []byte, error) SeekBothRange(key, value []byte) ([]byte, error) // SeekBothRange - exact match of the key, but range match of the value FirstDup() ([]byte, error) // FirstDup - position at first data item of current key NextDup() ([]byte, []byte, error) // NextDup - position at next data item of current key NextNoDup() ([]byte, []byte, error) // NextNoDup - position at first data item of next key PrevDup() ([]byte, []byte, error) PrevNoDup() ([]byte, []byte, error) LastDup() ([]byte, error) // LastDup - position at last data item of current key CountDuplicates() (uint64, error) // CountDuplicates - number of duplicates for the current key } type RwCursorDupSort interface { CursorDupSort RwCursor PutNoDupData(key, value []byte) error // PutNoDupData - inserts key without dupsort DeleteCurrentDuplicates() error // DeleteCurrentDuplicates - deletes all of the data items for the current key DeleteExact(k1, k2 []byte) error // DeleteExact - delete 1 value from given key AppendDup(key, value []byte) error // AppendDup - same as Append, but for sorted dup data } // ---- Temporal part type ( Domain string History string InvertedIdx string ) type TemporalTx interface { Tx DomainGet(name Domain, k, k2 []byte) (v []byte, ok bool, err error) DomainGetAsOf(name Domain, k, k2 []byte, ts uint64) (v []byte, ok bool, err error) HistoryGet(name History, k []byte, ts uint64) (v []byte, ok bool, err error) // IndexRange - return iterator over range of inverted index for given key `k` // Asc semantic: [from, to) AND from > to // Desc semantic: [from, to) AND from < to // Limit -1 means Unlimited // from -1, to -1 means unbounded (StartOfTable, EndOfTable) // Example: IndexRange("IndexName", 10, 5, order.Desc, -1) // Example: IndexRange("IndexName", -1, -1, order.Asc, 10) IndexRange(name InvertedIdx, k []byte, fromTs, toTs int, asc order.By, limit int) (timestamps iter.U64, err error) HistoryRange(name History, fromTs, toTs int, asc order.By, limit int) (it iter.KV, err error) DomainRange(name Domain, fromKey, toKey []byte, ts uint64, asc order.By, limit int) (it iter.KV, err error) }