2021-10-16 09:43:41 +00:00
|
|
|
/*
|
2022-06-17 11:39:49 +00:00
|
|
|
Copyright 2022 Erigon contributors
|
2021-10-16 09:43:41 +00:00
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package recsplit
|
|
|
|
|
|
|
|
import (
|
2022-06-17 11:39:49 +00:00
|
|
|
"bufio"
|
2021-10-16 09:43:41 +00:00
|
|
|
"encoding/binary"
|
2022-06-17 11:39:49 +00:00
|
|
|
"fmt"
|
2021-10-16 09:43:41 +00:00
|
|
|
"math"
|
2022-06-17 11:39:49 +00:00
|
|
|
"math/bits"
|
2021-10-16 09:43:41 +00:00
|
|
|
"os"
|
2022-10-12 03:18:51 +00:00
|
|
|
"path/filepath"
|
2021-10-16 09:43:41 +00:00
|
|
|
"unsafe"
|
|
|
|
|
|
|
|
"github.com/ledgerwatch/erigon-lib/mmap"
|
2021-11-05 10:04:17 +00:00
|
|
|
"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano16"
|
|
|
|
"github.com/ledgerwatch/erigon-lib/recsplit/eliasfano32"
|
2021-10-16 09:43:41 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
// Index implements index lookup from the file created by the RecSplit
|
|
|
|
type Index struct {
|
|
|
|
indexFile string
|
|
|
|
f *os.File
|
|
|
|
mmapHandle1 []byte // mmap handle for unix (this is used to close mmap)
|
|
|
|
mmapHandle2 *[mmap.MaxMapSize]byte // mmap handle for windows (this is used to close mmap)
|
|
|
|
data []byte // slice of correct size for the index to work with
|
|
|
|
keyCount uint64
|
|
|
|
bytesPerRec int
|
|
|
|
recMask uint64
|
|
|
|
grData []uint64
|
2021-11-05 10:04:17 +00:00
|
|
|
ef eliasfano16.DoubleEliasFano
|
2021-10-16 09:43:41 +00:00
|
|
|
enums bool
|
2021-11-05 10:04:17 +00:00
|
|
|
offsetEf *eliasfano32.EliasFano
|
2021-11-21 14:52:23 +00:00
|
|
|
baseDataID uint64
|
2022-01-24 20:39:04 +00:00
|
|
|
bucketCount uint64 // Number of buckets
|
2021-10-16 09:43:41 +00:00
|
|
|
bucketSize int
|
|
|
|
leafSize uint16 // Leaf size for recursive split algorithms
|
|
|
|
primaryAggrBound uint16 // The lower bound for primary key aggregation (computed from leafSize)
|
|
|
|
secondaryAggrBound uint16 // The lower bound for secondary key aggregation (computed from leadSize)
|
|
|
|
salt uint32
|
|
|
|
startSeed []uint64
|
|
|
|
golombRice []uint32
|
2022-01-31 22:32:00 +00:00
|
|
|
size int64
|
2021-10-16 09:43:41 +00:00
|
|
|
}
|
|
|
|
|
2021-10-31 02:38:10 +00:00
|
|
|
func MustOpen(indexFile string) *Index {
|
2021-11-15 14:19:56 +00:00
|
|
|
idx, err := OpenIndex(indexFile)
|
2021-10-31 02:38:10 +00:00
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return idx
|
|
|
|
}
|
|
|
|
|
2021-11-15 14:19:56 +00:00
|
|
|
func OpenIndex(indexFile string) (*Index, error) {
|
2021-10-16 09:43:41 +00:00
|
|
|
idx := &Index{
|
|
|
|
indexFile: indexFile,
|
|
|
|
}
|
|
|
|
var err error
|
|
|
|
idx.f, err = os.Open(indexFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
var stat os.FileInfo
|
|
|
|
if stat, err = idx.f.Stat(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-01-31 22:32:00 +00:00
|
|
|
idx.size = stat.Size()
|
|
|
|
if idx.mmapHandle1, idx.mmapHandle2, err = mmap.Mmap(idx.f, int(idx.size)); err != nil {
|
2021-10-16 09:43:41 +00:00
|
|
|
return nil, err
|
|
|
|
}
|
2022-01-31 22:32:00 +00:00
|
|
|
idx.data = idx.mmapHandle1[:idx.size]
|
2021-10-16 09:43:41 +00:00
|
|
|
// Read number of keys and bytes per record
|
2021-11-21 14:52:23 +00:00
|
|
|
idx.baseDataID = binary.BigEndian.Uint64(idx.data[:8])
|
|
|
|
idx.keyCount = binary.BigEndian.Uint64(idx.data[8:16])
|
|
|
|
idx.bytesPerRec = int(idx.data[16])
|
2021-10-16 09:43:41 +00:00
|
|
|
idx.recMask = (uint64(1) << (8 * idx.bytesPerRec)) - 1
|
2021-11-21 14:52:23 +00:00
|
|
|
offset := 16 + 1 + int(idx.keyCount)*idx.bytesPerRec
|
|
|
|
|
2022-09-07 07:40:27 +00:00
|
|
|
if offset < 0 {
|
|
|
|
return nil, fmt.Errorf("offset is: %d which is below zero, the file: %s is broken", offset, indexFile)
|
|
|
|
}
|
|
|
|
|
2021-10-16 09:43:41 +00:00
|
|
|
// Bucket count, bucketSize, leafSize
|
|
|
|
idx.bucketCount = binary.BigEndian.Uint64(idx.data[offset:])
|
|
|
|
offset += 8
|
|
|
|
idx.bucketSize = int(binary.BigEndian.Uint16(idx.data[offset:]))
|
|
|
|
offset += 2
|
|
|
|
idx.leafSize = binary.BigEndian.Uint16(idx.data[offset:])
|
|
|
|
offset += 2
|
|
|
|
idx.primaryAggrBound = idx.leafSize * uint16(math.Max(2, math.Ceil(0.35*float64(idx.leafSize)+1./2.)))
|
|
|
|
if idx.leafSize < 7 {
|
|
|
|
idx.secondaryAggrBound = idx.primaryAggrBound * 2
|
|
|
|
} else {
|
|
|
|
idx.secondaryAggrBound = idx.primaryAggrBound * uint16(math.Ceil(0.21*float64(idx.leafSize)+9./10.))
|
|
|
|
}
|
|
|
|
// Salt
|
|
|
|
idx.salt = binary.BigEndian.Uint32(idx.data[offset:])
|
|
|
|
offset += 4
|
|
|
|
// Start seed
|
|
|
|
startSeedLen := int(idx.data[offset])
|
|
|
|
offset++
|
|
|
|
idx.startSeed = make([]uint64, startSeedLen)
|
|
|
|
for i := 0; i < startSeedLen; i++ {
|
|
|
|
idx.startSeed[i] = binary.BigEndian.Uint64(idx.data[offset:])
|
|
|
|
offset += 8
|
|
|
|
}
|
|
|
|
idx.enums = idx.data[offset] != 0
|
|
|
|
offset++
|
|
|
|
if idx.enums {
|
|
|
|
var size int
|
2021-11-05 10:04:17 +00:00
|
|
|
idx.offsetEf, size = eliasfano32.ReadEliasFano(idx.data[offset:])
|
2021-10-16 09:43:41 +00:00
|
|
|
offset += size
|
|
|
|
}
|
|
|
|
// Size of golomb rice params
|
|
|
|
golombParamSize := binary.BigEndian.Uint16(idx.data[offset:])
|
|
|
|
offset += 4
|
|
|
|
idx.golombRice = make([]uint32, golombParamSize)
|
|
|
|
for i := uint16(0); i < golombParamSize; i++ {
|
|
|
|
if i == 0 {
|
|
|
|
idx.golombRice[i] = (bijMemo[i] << 27) | bijMemo[i]
|
|
|
|
} else if i <= idx.leafSize {
|
|
|
|
idx.golombRice[i] = (bijMemo[i] << 27) | (uint32(1) << 16) | bijMemo[i]
|
|
|
|
} else {
|
|
|
|
computeGolombRice(i, idx.golombRice, idx.leafSize, idx.primaryAggrBound, idx.secondaryAggrBound)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
l := binary.BigEndian.Uint64(idx.data[offset:])
|
|
|
|
offset += 8
|
|
|
|
p := (*[maxDataSize / 8]uint64)(unsafe.Pointer(&idx.data[offset]))
|
|
|
|
idx.grData = p[:l]
|
|
|
|
offset += 8 * int(l)
|
|
|
|
idx.ef.Read(idx.data[offset:])
|
|
|
|
return idx, nil
|
|
|
|
}
|
|
|
|
|
2022-01-31 22:32:00 +00:00
|
|
|
func (idx *Index) Size() int64 {
|
|
|
|
return idx.size
|
|
|
|
}
|
|
|
|
|
2021-11-21 14:52:23 +00:00
|
|
|
func (idx *Index) BaseDataID() uint64 { return idx.baseDataID }
|
|
|
|
|
2021-10-16 09:43:41 +00:00
|
|
|
func (idx *Index) Close() error {
|
|
|
|
if err := mmap.Munmap(idx.mmapHandle1, idx.mmapHandle2); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := idx.f.Close(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (idx *Index) skipBits(m uint16) int {
|
|
|
|
return int(idx.golombRice[m] & 0xffff)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (idx *Index) skipNodes(m uint16) int {
|
|
|
|
return int(idx.golombRice[m]>>16) & 0x7FF
|
|
|
|
}
|
|
|
|
|
|
|
|
// golombParam returns the optimal Golomb parameter to use for encoding
|
|
|
|
// salt for the part of the hash function separating m elements. It is based on
|
|
|
|
// calculations with assumptions that we draw hash functions at random
|
|
|
|
func (idx *Index) golombParam(m uint16) int {
|
|
|
|
return int(idx.golombRice[m] >> 27)
|
|
|
|
}
|
|
|
|
|
2022-01-15 22:09:06 +00:00
|
|
|
func (idx *Index) Empty() bool {
|
2021-11-13 12:12:29 +00:00
|
|
|
return idx.keyCount == 0
|
|
|
|
}
|
|
|
|
|
2022-02-20 22:14:06 +00:00
|
|
|
func (idx *Index) KeyCount() uint64 {
|
|
|
|
return idx.keyCount
|
|
|
|
}
|
|
|
|
|
2022-01-15 22:09:06 +00:00
|
|
|
// Lookup is not thread-safe because it used id.hasher
|
2022-01-24 20:39:04 +00:00
|
|
|
func (idx *Index) Lookup(bucketHash, fingerprint uint64) uint64 {
|
2021-11-13 12:12:29 +00:00
|
|
|
if idx.keyCount == 0 {
|
2022-10-12 03:18:51 +00:00
|
|
|
_, fName := filepath.Split(idx.indexFile)
|
|
|
|
panic("no Lookup should be done when keyCount==0, please use Empty function to guard " + fName)
|
2021-11-13 12:12:29 +00:00
|
|
|
}
|
|
|
|
if idx.keyCount == 1 {
|
|
|
|
return 0
|
|
|
|
}
|
2021-10-16 09:43:41 +00:00
|
|
|
var gr GolombRiceReader
|
|
|
|
gr.data = idx.grData
|
2022-01-24 20:39:04 +00:00
|
|
|
|
2021-10-16 09:43:41 +00:00
|
|
|
bucket := remap(bucketHash, idx.bucketCount)
|
|
|
|
cumKeys, cumKeysNext, bitPos := idx.ef.Get3(bucket)
|
|
|
|
m := uint16(cumKeysNext - cumKeys) // Number of keys in this bucket
|
|
|
|
gr.ReadReset(int(bitPos), idx.skipBits(m))
|
|
|
|
var level int
|
|
|
|
for m > idx.secondaryAggrBound { // fanout = 2
|
|
|
|
d := gr.ReadNext(idx.golombParam(m))
|
|
|
|
hmod := remap16(remix(fingerprint+idx.startSeed[level]+d), m)
|
|
|
|
split := (((m+1)/2 + idx.secondaryAggrBound - 1) / idx.secondaryAggrBound) * idx.secondaryAggrBound
|
|
|
|
if hmod < split {
|
|
|
|
m = split
|
|
|
|
} else {
|
|
|
|
gr.SkipSubtree(idx.skipNodes(split), idx.skipBits(split))
|
|
|
|
m -= split
|
|
|
|
cumKeys += uint64(split)
|
|
|
|
}
|
|
|
|
level++
|
|
|
|
}
|
|
|
|
if m > idx.primaryAggrBound {
|
|
|
|
d := gr.ReadNext(idx.golombParam(m))
|
|
|
|
hmod := remap16(remix(fingerprint+idx.startSeed[level]+d), m)
|
|
|
|
part := hmod / idx.primaryAggrBound
|
|
|
|
if idx.primaryAggrBound < m-part*idx.primaryAggrBound {
|
|
|
|
m = idx.primaryAggrBound
|
|
|
|
} else {
|
|
|
|
m = m - part*idx.primaryAggrBound
|
|
|
|
}
|
|
|
|
cumKeys += uint64(idx.primaryAggrBound * part)
|
|
|
|
if part != 0 {
|
|
|
|
gr.SkipSubtree(idx.skipNodes(idx.primaryAggrBound)*int(part), idx.skipBits(idx.primaryAggrBound)*int(part))
|
|
|
|
}
|
|
|
|
level++
|
|
|
|
}
|
|
|
|
if m > idx.leafSize {
|
|
|
|
d := gr.ReadNext(idx.golombParam(m))
|
|
|
|
hmod := remap16(remix(fingerprint+idx.startSeed[level]+d), m)
|
|
|
|
part := hmod / idx.leafSize
|
|
|
|
if idx.leafSize < m-part*idx.leafSize {
|
|
|
|
m = idx.leafSize
|
|
|
|
} else {
|
|
|
|
m = m - part*idx.leafSize
|
|
|
|
}
|
|
|
|
cumKeys += uint64(idx.leafSize * part)
|
|
|
|
if part != 0 {
|
|
|
|
gr.SkipSubtree(int(part), idx.skipBits(idx.leafSize)*int(part))
|
|
|
|
}
|
|
|
|
level++
|
|
|
|
}
|
|
|
|
b := gr.ReadNext(idx.golombParam(m))
|
|
|
|
rec := int(cumKeys) + int(remap16(remix(fingerprint+idx.startSeed[level]+b), m))
|
2021-11-21 14:52:23 +00:00
|
|
|
return binary.BigEndian.Uint64(idx.data[1+8+idx.bytesPerRec*(rec+1):]) & idx.recMask
|
2021-10-16 09:43:41 +00:00
|
|
|
}
|
|
|
|
|
2022-06-17 11:39:49 +00:00
|
|
|
// OrdinalLookup returns the offset of i-th element in the index
|
|
|
|
// Perfect hash table lookup is not performed, only access to the
|
|
|
|
// Elias-Fano structure containing all offsets.
|
|
|
|
func (idx *Index) OrdinalLookup(i uint64) uint64 {
|
2021-10-16 09:43:41 +00:00
|
|
|
return idx.offsetEf.Get(i)
|
|
|
|
}
|
2022-06-17 11:39:49 +00:00
|
|
|
|
|
|
|
func (idx *Index) ExtractOffsets() map[uint64]uint64 {
|
|
|
|
m := map[uint64]uint64{}
|
|
|
|
pos := 1 + 8 + idx.bytesPerRec
|
|
|
|
for rec := uint64(0); rec < idx.keyCount; rec++ {
|
|
|
|
offset := binary.BigEndian.Uint64(idx.data[pos:]) & idx.recMask
|
|
|
|
m[offset] = 0
|
|
|
|
pos += idx.bytesPerRec
|
|
|
|
}
|
|
|
|
return m
|
|
|
|
}
|
|
|
|
|
|
|
|
func (idx *Index) RewriteWithOffsets(w *bufio.Writer, m map[uint64]uint64) error {
|
|
|
|
// New max offset
|
|
|
|
var maxOffset uint64
|
|
|
|
for _, offset := range m {
|
|
|
|
if offset > maxOffset {
|
|
|
|
maxOffset = offset
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bytesPerRec := (bits.Len64(maxOffset) + 7) / 8
|
|
|
|
var numBuf [8]byte
|
|
|
|
// Write baseDataID
|
|
|
|
binary.BigEndian.PutUint64(numBuf[:], idx.baseDataID)
|
|
|
|
if _, err := w.Write(numBuf[:]); err != nil {
|
|
|
|
return fmt.Errorf("write number of keys: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Write number of keys
|
|
|
|
binary.BigEndian.PutUint64(numBuf[:], idx.keyCount)
|
|
|
|
if _, err := w.Write(numBuf[:]); err != nil {
|
|
|
|
return fmt.Errorf("write number of keys: %w", err)
|
|
|
|
}
|
|
|
|
// Write number of bytes per index record
|
|
|
|
if err := w.WriteByte(byte(bytesPerRec)); err != nil {
|
|
|
|
return fmt.Errorf("write bytes per record: %w", err)
|
|
|
|
}
|
|
|
|
pos := 1 + 8 + idx.bytesPerRec
|
|
|
|
for rec := uint64(0); rec < idx.keyCount; rec++ {
|
|
|
|
offset := binary.BigEndian.Uint64(idx.data[pos:]) & idx.recMask
|
|
|
|
pos += idx.bytesPerRec
|
|
|
|
binary.BigEndian.PutUint64(numBuf[:], m[offset])
|
|
|
|
if _, err := w.Write(numBuf[8-bytesPerRec:]); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Write the rest as it is (TODO - wrong for indices with enums)
|
|
|
|
if _, err := w.Write(idx.data[16+1+int(idx.keyCount)*idx.bytesPerRec:]); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
2022-10-04 09:51:51 +00:00
|
|
|
|
|
|
|
// DisableReadAhead - usage: `defer d.EnableReadAhead().DisableReadAhead()`. Please don't use this funcs without `defer` to avoid leak.
|
|
|
|
func (idx *Index) DisableReadAhead() { _ = mmap.MadviseRandom(idx.mmapHandle1) }
|
|
|
|
func (idx *Index) EnableReadAhead() *Index {
|
|
|
|
_ = mmap.MadviseSequential(idx.mmapHandle1)
|
|
|
|
return idx
|
|
|
|
}
|
|
|
|
func (idx *Index) EnableMadvNormal() *Index {
|
|
|
|
_ = mmap.MadviseNormal(idx.mmapHandle1)
|
|
|
|
return idx
|
|
|
|
}
|
|
|
|
func (idx *Index) EnableWillNeed() *Index {
|
|
|
|
_ = mmap.MadviseWillNeed(idx.mmapHandle1)
|
|
|
|
return idx
|
|
|
|
}
|