2021-09-11 15:11:41 +00:00
|
|
|
/*
|
|
|
|
Copyright 2021 Erigon contributors
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package etl
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bufio"
|
2022-02-20 22:14:06 +00:00
|
|
|
"encoding/binary"
|
2021-09-11 15:11:41 +00:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"io/ioutil"
|
|
|
|
"os"
|
|
|
|
"runtime"
|
|
|
|
|
|
|
|
"github.com/ledgerwatch/erigon-lib/common"
|
|
|
|
"github.com/ledgerwatch/log/v3"
|
|
|
|
)
|
|
|
|
|
|
|
|
type dataProvider interface {
|
2022-02-20 22:14:06 +00:00
|
|
|
Next(keyBuf, valBuf []byte) ([]byte, []byte, error)
|
2021-09-11 15:11:41 +00:00
|
|
|
Dispose() uint64 // Safe for repeated call, doesn't return error - means defer-friendly
|
|
|
|
}
|
|
|
|
|
|
|
|
type fileDataProvider struct {
|
2022-02-20 22:14:06 +00:00
|
|
|
file *os.File
|
|
|
|
reader io.Reader
|
|
|
|
byteReader io.ByteReader // Different interface to the same object as reader
|
2021-09-11 15:11:41 +00:00
|
|
|
}
|
|
|
|
|
2022-02-10 07:40:24 +00:00
|
|
|
// FlushToDisk - `doFsync` is true only for 'critical' collectors (which should not loose).
|
2022-03-21 04:22:17 +00:00
|
|
|
func FlushToDisk(b Buffer, tmpdir string, doFsync bool, lvl log.Lvl) (dataProvider, error) {
|
2021-09-11 15:11:41 +00:00
|
|
|
if b.Len() == 0 {
|
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
// if we are going to create files in the system temp dir, we don't need any
|
|
|
|
// subfolders.
|
|
|
|
if tmpdir != "" {
|
|
|
|
if err := os.MkdirAll(tmpdir, 0755); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-06 10:32:37 +00:00
|
|
|
bufferFile, err := ioutil.TempFile(tmpdir, "erigon-sortable-buf-")
|
2021-09-11 15:11:41 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2022-02-10 07:40:24 +00:00
|
|
|
if doFsync {
|
|
|
|
defer bufferFile.Sync() //nolint:errcheck
|
|
|
|
}
|
2021-09-11 15:11:41 +00:00
|
|
|
|
|
|
|
w := bufio.NewWriterSize(bufferFile, BufIOSize)
|
|
|
|
defer w.Flush() //nolint:errcheck
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
b.Reset() // run it after buf.flush and file.sync
|
2022-03-21 04:22:17 +00:00
|
|
|
var m runtime.MemStats
|
2022-05-19 04:46:55 +00:00
|
|
|
if lvl >= log.LvlInfo {
|
|
|
|
common.ReadMemStats(&m)
|
|
|
|
}
|
2022-03-21 04:22:17 +00:00
|
|
|
log.Log(lvl,
|
|
|
|
"Flushed buffer file",
|
|
|
|
"name", bufferFile.Name(),
|
|
|
|
"alloc", common.ByteCount(m.Alloc), "sys", common.ByteCount(m.Sys))
|
2021-09-11 15:11:41 +00:00
|
|
|
}()
|
|
|
|
|
2022-02-20 22:14:06 +00:00
|
|
|
if err = b.Write(w); err != nil {
|
2022-01-22 05:21:04 +00:00
|
|
|
return nil, fmt.Errorf("error writing entries to disk: %w", err)
|
2021-09-11 15:11:41 +00:00
|
|
|
}
|
|
|
|
|
2022-02-20 22:14:06 +00:00
|
|
|
return &fileDataProvider{file: bufferFile, reader: nil}, nil
|
2021-09-11 15:11:41 +00:00
|
|
|
}
|
|
|
|
|
2022-02-20 22:14:06 +00:00
|
|
|
func (p *fileDataProvider) Next(keyBuf, valBuf []byte) ([]byte, []byte, error) {
|
2021-09-11 15:11:41 +00:00
|
|
|
if p.reader == nil {
|
|
|
|
_, err := p.file.Seek(0, 0)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
2022-02-20 22:14:06 +00:00
|
|
|
r := bufio.NewReaderSize(p.file, BufIOSize)
|
|
|
|
p.reader = r
|
|
|
|
p.byteReader = r
|
|
|
|
|
2021-09-11 15:11:41 +00:00
|
|
|
}
|
2022-02-20 22:14:06 +00:00
|
|
|
return readElementFromDisk(p.reader, p.byteReader, keyBuf, valBuf)
|
2021-09-11 15:11:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p *fileDataProvider) Dispose() uint64 {
|
|
|
|
info, _ := os.Stat(p.file.Name())
|
|
|
|
_ = p.file.Close()
|
|
|
|
_ = os.Remove(p.file.Name())
|
|
|
|
if info == nil {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
return uint64(info.Size())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *fileDataProvider) String() string {
|
|
|
|
return fmt.Sprintf("%T(file: %s)", p, p.file.Name())
|
|
|
|
}
|
|
|
|
|
2022-02-20 22:14:06 +00:00
|
|
|
func readElementFromDisk(r io.Reader, br io.ByteReader, keyBuf, valBuf []byte) ([]byte, []byte, error) {
|
|
|
|
n, err := binary.ReadUvarint(br)
|
|
|
|
if err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
if n > 0 {
|
|
|
|
// Reallocate the slice or extend it if there is enough capacity
|
|
|
|
if len(keyBuf)+int(n) > cap(keyBuf) {
|
|
|
|
newKeyBuf := make([]byte, len(keyBuf)+int(n))
|
|
|
|
copy(newKeyBuf, keyBuf)
|
|
|
|
keyBuf = newKeyBuf
|
|
|
|
} else {
|
|
|
|
keyBuf = keyBuf[:len(keyBuf)+int(n)]
|
|
|
|
}
|
|
|
|
if _, err = io.ReadFull(r, keyBuf[len(keyBuf)-int(n):]); err != nil {
|
|
|
|
return nil, nil, err
|
2022-01-22 05:21:04 +00:00
|
|
|
}
|
|
|
|
}
|
2022-02-20 22:14:06 +00:00
|
|
|
if n, err = binary.ReadUvarint(br); err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
if n > 0 {
|
|
|
|
// Reallocate the slice or extend it if there is enough capacity
|
|
|
|
if len(valBuf)+int(n) > cap(valBuf) {
|
|
|
|
newValBuf := make([]byte, len(valBuf)+int(n))
|
|
|
|
copy(newValBuf, valBuf)
|
|
|
|
valBuf = newValBuf
|
|
|
|
} else {
|
|
|
|
valBuf = valBuf[:len(valBuf)+int(n)]
|
|
|
|
}
|
|
|
|
if _, err = io.ReadFull(r, valBuf[len(valBuf)-int(n):]); err != nil {
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return keyBuf, valBuf, err
|
2021-09-11 15:11:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type memoryDataProvider struct {
|
|
|
|
buffer Buffer
|
|
|
|
currentIndex int
|
|
|
|
}
|
|
|
|
|
|
|
|
func KeepInRAM(buffer Buffer) dataProvider {
|
|
|
|
return &memoryDataProvider{buffer, 0}
|
|
|
|
}
|
|
|
|
|
2022-02-20 22:14:06 +00:00
|
|
|
func (p *memoryDataProvider) Next(keyBuf, valBuf []byte) ([]byte, []byte, error) {
|
2021-09-11 15:11:41 +00:00
|
|
|
if p.currentIndex >= p.buffer.Len() {
|
|
|
|
return nil, nil, io.EOF
|
|
|
|
}
|
2022-02-20 22:14:06 +00:00
|
|
|
key, value := p.buffer.Get(p.currentIndex, keyBuf, valBuf)
|
2021-09-11 15:11:41 +00:00
|
|
|
p.currentIndex++
|
2022-02-20 22:14:06 +00:00
|
|
|
return key, value, nil
|
2021-09-11 15:11:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (p *memoryDataProvider) Dispose() uint64 {
|
|
|
|
return 0 /* doesn't take space on disk */
|
|
|
|
}
|
|
|
|
|
|
|
|
func (p *memoryDataProvider) String() string {
|
|
|
|
return fmt.Sprintf("%T(buffer.Len: %d)", p, p.buffer.Len())
|
|
|
|
}
|