erigon-pulse/state/domain_test.go
ledgerwatch c5a10975ab
[erigon2] Introduce inverted index type (#473)
* [erigon2] Introduce inverted index type

* More inverted index code

* More tests for inverted index

* Think about public and non-public APIs

* Minimise DB access when accessing history

* Work on iterator

* Implementation of inverted iterator

* Test for inverted index

* Assert end of iterators

* Merge of inverted index files and test

* Fix lint

Co-authored-by: Alexey Sharp <alexeysharp@Alexeys-iMac.local>
Co-authored-by: Alex Sharp <alexsharp@Alexs-MacBook-Pro.local>
2022-05-31 18:42:04 +01:00

544 lines
15 KiB
Go

/*
Copyright 2022 Erigon contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package state
import (
"context"
"encoding/binary"
"fmt"
"strings"
"testing"
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon-lib/kv/mdbx"
"github.com/ledgerwatch/erigon-lib/recsplit"
"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
"github.com/ledgerwatch/log/v3"
"github.com/stretchr/testify/require"
)
func testDbAndDomain(t *testing.T, prefixLen int) (kv.RwDB, *Domain) {
t.Helper()
path := t.TempDir()
logger := log.New()
keysTable := "Keys"
valsTable := "Vals"
historyKeysTable := "HistoryKeys"
historyValsTable := "HistoryVals"
settingsTable := "Settings"
indexTable := "Index"
db := mdbx.NewMDBX(logger).Path(path).WithTablessCfg(func(defaultBuckets kv.TableCfg) kv.TableCfg {
return kv.TableCfg{
keysTable: kv.TableCfgItem{Flags: kv.DupSort},
valsTable: kv.TableCfgItem{},
historyKeysTable: kv.TableCfgItem{Flags: kv.DupSort},
historyValsTable: kv.TableCfgItem{},
settingsTable: kv.TableCfgItem{},
indexTable: kv.TableCfgItem{Flags: kv.DupSort},
}
}).MustOpen()
d, err := NewDomain(path, 16 /* aggregationStep */, "base" /* filenameBase */, keysTable, valsTable, historyKeysTable, historyValsTable, settingsTable, indexTable, prefixLen)
require.NoError(t, err)
return db, d
}
func TestCollationBuild(t *testing.T) {
db, d := testDbAndDomain(t, 0 /* prefixLen */)
defer db.Close()
defer d.Close()
tx, err := db.BeginRw(context.Background())
require.NoError(t, err)
defer tx.Rollback()
d.SetTx(tx)
d.SetTxNum(2)
err = d.Put([]byte("key1"), []byte("value1.1"))
require.NoError(t, err)
d.SetTxNum(3)
err = d.Put([]byte("key2"), []byte("value2.1"))
require.NoError(t, err)
d.SetTxNum(6)
err = d.Put([]byte("key1"), []byte("value1.2"))
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
roTx, err := db.BeginRo(context.Background())
require.NoError(t, err)
defer roTx.Rollback()
c, err := d.collate(0, 0, 7, roTx)
require.NoError(t, err)
require.True(t, strings.HasSuffix(c.valuesPath, "base-values.0-16.dat"))
require.Equal(t, 2, c.valuesCount)
require.True(t, strings.HasSuffix(c.historyPath, "base-history.0-16.dat"))
require.Equal(t, 3, c.historyCount)
require.Equal(t, 2, len(c.indexBitmaps))
require.Equal(t, []uint64{3}, c.indexBitmaps["key2"].ToArray())
require.Equal(t, []uint64{2, 6}, c.indexBitmaps["key1"].ToArray())
sf, err := d.buildFiles(0, c)
require.NoError(t, err)
defer sf.Close()
g := sf.valuesDecomp.MakeGetter()
g.Reset(0)
var words []string
for g.HasNext() {
w, _ := g.Next(nil)
words = append(words, string(w))
}
require.Equal(t, []string{"key1", "value1.2", "key2", "value2.1"}, words)
// Check index
require.Equal(t, 2, int(sf.valuesIdx.KeyCount()))
r := recsplit.NewIndexReader(sf.valuesIdx)
for i := 0; i < len(words); i += 2 {
offset := r.Lookup([]byte(words[i]))
g.Reset(offset)
w, _ := g.Next(nil)
require.Equal(t, words[i], string(w))
w, _ = g.Next(nil)
require.Equal(t, words[i+1], string(w))
}
g = sf.historyDecomp.MakeGetter()
g.Reset(0)
words = words[:0]
for g.HasNext() {
w, _ := g.Next(nil)
words = append(words, string(w))
}
require.Equal(t, []string{"\x00\x00\x00\x00\x00\x00\x00\x02key1", "", "\x00\x00\x00\x00\x00\x00\x00\x03key2", "", "\x00\x00\x00\x00\x00\x00\x00\x06key1", "value1.1"}, words)
require.Equal(t, 3, int(sf.historyIdx.KeyCount()))
r = recsplit.NewIndexReader(sf.historyIdx)
for i := 0; i < len(words); i += 2 {
offset := r.Lookup([]byte(words[i]))
g.Reset(offset)
w, _ := g.Next(nil)
require.Equal(t, words[i+1], string(w))
}
g = sf.efHistoryDecomp.MakeGetter()
g.Reset(0)
words = words[:0]
var intArrs [][]uint64
for g.HasNext() {
w, _ := g.Next(nil)
words = append(words, string(w))
w, _ = g.Next(w[:0])
ef, _ := eliasfano32.ReadEliasFano(w)
var ints []uint64
it := ef.Iterator()
for it.HasNext() {
ints = append(ints, it.Next())
}
intArrs = append(intArrs, ints)
}
require.Equal(t, []string{"key1", "key2"}, words)
require.Equal(t, [][]uint64{{2, 6}, {3}}, intArrs)
r = recsplit.NewIndexReader(sf.efHistoryIdx)
for i := 0; i < len(words); i++ {
offset := r.Lookup([]byte(words[i]))
g.Reset(offset)
w, _ := g.Next(nil)
require.Equal(t, words[i], string(w))
}
}
func TestIterationBasic(t *testing.T) {
db, d := testDbAndDomain(t, 5 /* prefixLen */)
defer db.Close()
defer d.Close()
tx, err := db.BeginRw(context.Background())
require.NoError(t, err)
defer tx.Rollback()
d.SetTx(tx)
d.SetTxNum(2)
err = d.Put([]byte("addr1loc1"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr1loc2"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr1loc3"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr2loc1"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr2loc2"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr3loc1"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr3loc2"), []byte("value1"))
require.NoError(t, err)
var keys, vals []string
err = d.IteratePrefix([]byte("addr2"), func(k, v []byte) {
keys = append(keys, string(k))
vals = append(vals, string(v))
})
require.NoError(t, err)
require.Equal(t, []string{"addr2loc1", "addr2loc2"}, keys)
require.Equal(t, []string{"value1", "value1"}, vals)
}
func TestAfterPrune(t *testing.T) {
db, d := testDbAndDomain(t, 0 /* prefixLen */)
defer db.Close()
defer d.Close()
tx, err := db.BeginRw(context.Background())
require.NoError(t, err)
defer func() {
if tx != nil {
tx.Rollback()
}
}()
d.SetTx(tx)
d.SetTxNum(2)
err = d.Put([]byte("key1"), []byte("value1.1"))
require.NoError(t, err)
d.SetTxNum(3)
err = d.Put([]byte("key2"), []byte("value2.1"))
require.NoError(t, err)
d.SetTxNum(6)
err = d.Put([]byte("key1"), []byte("value1.2"))
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
roTx, err := db.BeginRo(context.Background())
require.NoError(t, err)
defer roTx.Rollback()
c, err := d.collate(0, 0, 16, roTx)
require.NoError(t, err)
sf, err := d.buildFiles(0, c)
require.NoError(t, err)
defer sf.Close()
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
d.integrateFiles(sf, 0, 16)
var v []byte
v, err = d.Get([]byte("key1"), tx)
require.NoError(t, err)
require.Equal(t, []byte("value1.2"), v)
v, err = d.Get([]byte("key2"), tx)
require.NoError(t, err)
require.Equal(t, []byte("value2.1"), v)
err = d.prune(0, 0, 16)
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
for _, table := range []string{d.keysTable, d.valsTable, d.historyKeysTable, d.historyValsTable, d.indexTable} {
var cur kv.Cursor
cur, err = tx.Cursor(table)
require.NoError(t, err)
defer cur.Close()
var k []byte
k, _, err = cur.First()
require.NoError(t, err)
require.Nil(t, k, table)
}
v, err = d.Get([]byte("key1"), tx)
require.NoError(t, err)
require.Equal(t, []byte("value1.2"), v)
v, err = d.Get([]byte("key2"), tx)
require.NoError(t, err)
require.Equal(t, []byte("value2.1"), v)
}
func TestHistory(t *testing.T) {
db, d := testDbAndDomain(t, 0 /* prefixLen */)
defer db.Close()
defer d.Close()
tx, err := db.BeginRw(context.Background())
require.NoError(t, err)
defer func() {
if tx != nil {
tx.Rollback()
}
}()
d.SetTx(tx)
txs := uint64(1000)
// keys are encodings of numbers 1..31
// each key changes value on every txNum which is multiple of the key
for txNum := uint64(1); txNum <= txs; txNum++ {
d.SetTxNum(txNum)
for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ {
if txNum%keyNum == 0 {
valNum := txNum / keyNum
var k [8]byte
var v [8]byte
binary.BigEndian.PutUint64(k[:], keyNum)
binary.BigEndian.PutUint64(v[:], valNum)
err = d.Put(k[:], v[:])
require.NoError(t, err)
}
}
if txNum%10 == 0 {
err = tx.Commit()
require.NoError(t, err)
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
}
}
err = tx.Commit()
require.NoError(t, err)
// Leave the last 2 aggregation steps un-collated
for step := uint64(0); step < txs/d.aggregationStep-1; step++ {
func() {
require.NoError(t, err)
roTx, err := db.BeginRo(context.Background())
require.NoError(t, err)
c, err := d.collate(step, step*d.aggregationStep, (step+1)*d.aggregationStep, roTx)
roTx.Rollback()
require.NoError(t, err)
sf, err := d.buildFiles(step, c)
require.NoError(t, err)
d.integrateFiles(sf, step*d.aggregationStep, (step+1)*d.aggregationStep)
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
d.prune(step, step*d.aggregationStep, (step+1)*d.aggregationStep)
err = tx.Commit()
require.NoError(t, err)
tx = nil
}()
}
// Check the history
var roTx kv.Tx
for txNum := uint64(0); txNum <= txs; txNum++ {
if txNum == 976 {
// Create roTx obnly for the last several txNum, because all history before that
// we should be able to read without any DB access
roTx, err = db.BeginRo(context.Background())
require.NoError(t, err)
defer roTx.Rollback()
}
for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ {
valNum := txNum / keyNum
var k [8]byte
var v [8]byte
label := fmt.Sprintf("txNum=%d, keyNum=%d", txNum, keyNum)
binary.BigEndian.PutUint64(k[:], keyNum)
binary.BigEndian.PutUint64(v[:], valNum)
val, err := d.GetAfterTxNum(k[:], txNum, roTx)
require.NoError(t, err, label)
if txNum >= keyNum {
require.Equal(t, v[:], val, label)
} else {
require.Nil(t, val, label)
}
}
}
}
func TestIterationMultistep(t *testing.T) {
db, d := testDbAndDomain(t, 5 /* prefixLen */)
defer db.Close()
defer d.Close()
tx, err := db.BeginRw(context.Background())
require.NoError(t, err)
defer func() {
if tx != nil {
tx.Rollback()
}
}()
d.SetTx(tx)
d.SetTxNum(2)
err = d.Put([]byte("addr1loc1"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr1loc2"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr1loc3"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr2loc1"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr2loc2"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr3loc1"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr3loc2"), []byte("value1"))
require.NoError(t, err)
d.SetTxNum(2 + 16)
err = d.Put([]byte("addr2loc1"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr2loc2"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr2loc3"), []byte("value1"))
require.NoError(t, err)
err = d.Put([]byte("addr2loc4"), []byte("value1"))
require.NoError(t, err)
d.SetTxNum(2 + 16 + 16)
err = d.Delete([]byte("addr2loc1"))
require.NoError(t, err)
err = tx.Commit()
require.NoError(t, err)
for step := uint64(0); step <= 2; step++ {
func() {
require.NoError(t, err)
roTx, err := db.BeginRo(context.Background())
require.NoError(t, err)
c, err := d.collate(step, step*d.aggregationStep, (step+1)*d.aggregationStep, roTx)
roTx.Rollback()
require.NoError(t, err)
sf, err := d.buildFiles(step, c)
require.NoError(t, err)
d.integrateFiles(sf, step*d.aggregationStep, (step+1)*d.aggregationStep)
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
d.prune(step, step*d.aggregationStep, (step+1)*d.aggregationStep)
err = tx.Commit()
require.NoError(t, err)
tx = nil
}()
}
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
var keys []string
var vals []string
err = d.IteratePrefix([]byte("addr2"), func(k, v []byte) {
keys = append(keys, string(k))
vals = append(vals, string(v))
})
require.NoError(t, err)
require.Equal(t, []string{"addr2loc2", "addr2loc3", "addr2loc4"}, keys)
require.Equal(t, []string{"value1", "value1", "value1"}, vals)
}
func TestMergeFiles(t *testing.T) {
db, d := testDbAndDomain(t, 0 /* prefixLen */)
defer db.Close()
defer d.Close()
tx, err := db.BeginRw(context.Background())
require.NoError(t, err)
defer func() {
if tx != nil {
tx.Rollback()
}
}()
d.SetTx(tx)
txs := uint64(1000)
// keys are encodings of numbers 1..31
// each key changes value on every txNum which is multiple of the key
for txNum := uint64(1); txNum <= txs; txNum++ {
d.SetTxNum(txNum)
for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ {
if txNum%keyNum == 0 {
valNum := txNum / keyNum
var k [8]byte
var v [8]byte
binary.BigEndian.PutUint64(k[:], keyNum)
binary.BigEndian.PutUint64(v[:], valNum)
err = d.Put(k[:], v[:])
require.NoError(t, err)
}
}
if txNum%10 == 0 {
err = tx.Commit()
require.NoError(t, err)
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
}
}
err = tx.Commit()
require.NoError(t, err)
tx = nil
// Leave the last 2 aggregation steps un-collated
for step := uint64(0); step < txs/d.aggregationStep-1; step++ {
func() {
require.NoError(t, err)
roTx, err := db.BeginRo(context.Background())
require.NoError(t, err)
c, err := d.collate(step, step*d.aggregationStep, (step+1)*d.aggregationStep, roTx)
roTx.Rollback()
require.NoError(t, err)
sf, err := d.buildFiles(step, c)
require.NoError(t, err)
d.integrateFiles(sf, step*d.aggregationStep, (step+1)*d.aggregationStep)
tx, err = db.BeginRw(context.Background())
require.NoError(t, err)
d.SetTx(tx)
d.prune(step, step*d.aggregationStep, (step+1)*d.aggregationStep)
err = tx.Commit()
require.NoError(t, err)
tx = nil
var found bool
var startTxNum, endTxNum uint64
maxEndTxNum := d.endTxNumMinimax()
maxSpan := uint64(16 * 16)
for found, startTxNum, endTxNum = d.findMergeRange(maxEndTxNum, maxSpan); found; found, startTxNum, endTxNum = d.findMergeRange(maxEndTxNum, maxSpan) {
outs, _ := d.staticFilesInRange(startTxNum, endTxNum)
in, err := d.mergeFiles(outs, startTxNum, endTxNum, maxSpan)
require.NoError(t, err)
d.integrateMergedFiles(outs, in)
}
}()
}
// Check the history
var roTx kv.Tx
for txNum := uint64(1); txNum <= txs; txNum++ {
if txNum == 976 {
// Create roTx obnly for the last several txNum, because all history before that
// we should be able to read without any DB access
roTx, err = db.BeginRo(context.Background())
require.NoError(t, err)
defer roTx.Rollback()
}
for keyNum := uint64(1); keyNum <= uint64(31); keyNum++ {
valNum := txNum / keyNum
var k [8]byte
var v [8]byte
label := fmt.Sprintf("txNum=%d, keyNum=%d", txNum, keyNum)
binary.BigEndian.PutUint64(k[:], keyNum)
binary.BigEndian.PutUint64(v[:], valNum)
val, err := d.GetAfterTxNum(k[:], txNum, roTx)
require.NoError(t, err, label)
if txNum >= keyNum {
require.Equal(t, v[:], val, label)
} else {
require.Nil(t, val, label)
}
}
}
}