mirror of
https://gitlab.com/pulsechaincom/erigon-pulse.git
synced 2025-01-10 13:01:21 +00:00
db7322ef87
* dirty working equal dictionaries * slow but working decompression * much cleaner implementation with LRU words in dictionary with configurable condensity * fixed comment * removed tabs in comment line to fix lint Co-authored-by: alex.sharov <AskAlexSharov@gmail.com>
237 lines
5.4 KiB
Go
237 lines
5.4 KiB
Go
/*
|
|
Copyright 2021 Erigon contributors
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
you may not use this file except in compliance with the License.
|
|
You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
*/
|
|
|
|
package compress
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"github.com/ledgerwatch/log/v3"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func prepareLoremDict(t *testing.T) *Decompressor {
|
|
t.Helper()
|
|
tmpDir := t.TempDir()
|
|
file := filepath.Join(tmpDir, "compressed")
|
|
t.Name()
|
|
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer c.Close()
|
|
for k, w := range loremStrings {
|
|
if err = c.AddWord([]byte(fmt.Sprintf("%s %d", w, k))); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
if err = c.Compress(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
var d *Decompressor
|
|
if d, err = NewDecompressor(file); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
return d
|
|
}
|
|
|
|
func TestDecompressSkip(t *testing.T) {
|
|
d := prepareLoremDict(t)
|
|
defer d.Close()
|
|
g := d.MakeGetter()
|
|
i := 0
|
|
for g.HasNext() {
|
|
w := loremStrings[i]
|
|
if i%2 == 0 {
|
|
g.Skip()
|
|
} else {
|
|
word, _ := g.Next(nil)
|
|
expected := fmt.Sprintf("%s %d", w, i)
|
|
if string(word) != expected {
|
|
t.Errorf("expected %s, got (hex) %s", expected, word)
|
|
}
|
|
}
|
|
i++
|
|
}
|
|
}
|
|
|
|
func TestDecompressMatchOK(t *testing.T) {
|
|
d := prepareLoremDict(t)
|
|
defer d.Close()
|
|
g := d.MakeGetter()
|
|
i := 0
|
|
for g.HasNext() {
|
|
w := loremStrings[i]
|
|
if i%2 != 0 {
|
|
expected := fmt.Sprintf("%s %d", w, i)
|
|
ok, _ := g.Match([]byte(expected))
|
|
if !ok {
|
|
t.Errorf("expexted match with %s", expected)
|
|
}
|
|
} else {
|
|
word, _ := g.Next(nil)
|
|
expected := fmt.Sprintf("%s %d", w, i)
|
|
if string(word) != expected {
|
|
t.Errorf("expected %s, got (hex) %s", expected, word)
|
|
}
|
|
}
|
|
i++
|
|
}
|
|
}
|
|
|
|
func TestDecompressMatchNotOK(t *testing.T) {
|
|
d := prepareLoremDict(t)
|
|
defer d.Close()
|
|
g := d.MakeGetter()
|
|
i := 0
|
|
skipCount := 0
|
|
for g.HasNext() {
|
|
w := loremStrings[i]
|
|
expected := fmt.Sprintf("%s %d", w, i+1)
|
|
|
|
ok, _ := g.Match([]byte(expected))
|
|
if ok {
|
|
t.Errorf("not expexted match with %s", expected)
|
|
} else {
|
|
g.Skip()
|
|
skipCount++
|
|
}
|
|
i++
|
|
}
|
|
if skipCount != i {
|
|
t.Errorf("something wrong with match logic")
|
|
}
|
|
}
|
|
|
|
func TestDecompressMatchPrefix(t *testing.T) {
|
|
d := prepareLoremDict(t)
|
|
defer d.Close()
|
|
g := d.MakeGetter()
|
|
i := 0
|
|
skipCount := 0
|
|
for g.HasNext() {
|
|
w := loremStrings[i]
|
|
expected := []byte(fmt.Sprintf("%s %d", w, i+1))
|
|
expected = expected[:len(expected)/2]
|
|
if !g.MatchPrefix(expected) {
|
|
t.Errorf("expexted match with %s", expected)
|
|
}
|
|
g.Skip()
|
|
skipCount++
|
|
i++
|
|
}
|
|
if skipCount != i {
|
|
t.Errorf("something wrong with match logic")
|
|
}
|
|
g.Reset(0)
|
|
skipCount = 0
|
|
i = 0
|
|
for g.HasNext() {
|
|
w := loremStrings[i]
|
|
expected := []byte(fmt.Sprintf("%s %d", w, i+1))
|
|
expected = expected[:len(expected)/2]
|
|
if len(expected) > 0 {
|
|
expected[len(expected)-1]++
|
|
if g.MatchPrefix(expected) {
|
|
t.Errorf("not expexted match with %s", expected)
|
|
}
|
|
}
|
|
g.Skip()
|
|
skipCount++
|
|
i++
|
|
}
|
|
}
|
|
|
|
func prepareLoremDictUncompressed(t *testing.T) *Decompressor {
|
|
t.Helper()
|
|
tmpDir := t.TempDir()
|
|
file := filepath.Join(tmpDir, "compressed")
|
|
t.Name()
|
|
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
defer c.Close()
|
|
for k, w := range loremStrings {
|
|
if err = c.AddUncompressedWord([]byte(fmt.Sprintf("%s %d", w, k))); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
if err = c.Compress(); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
var d *Decompressor
|
|
if d, err = NewDecompressor(file); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
return d
|
|
}
|
|
|
|
func TestUncompressed(t *testing.T) {
|
|
d := prepareLoremDictUncompressed(t)
|
|
defer d.Close()
|
|
g := d.MakeGetter()
|
|
i := 0
|
|
for g.HasNext() {
|
|
w := loremStrings[i]
|
|
expected := []byte(fmt.Sprintf("%s %d", w, i+1))
|
|
expected = expected[:len(expected)/2]
|
|
actual, _ := g.NextUncompressed()
|
|
if bytes.Equal(expected, actual) {
|
|
t.Errorf("expected %s, actual %s", expected, actual)
|
|
}
|
|
i++
|
|
}
|
|
}
|
|
|
|
const lorem = `Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et
|
|
dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
|
|
consequat Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur
|
|
Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum`
|
|
|
|
var loremStrings = strings.Split(lorem, " ")
|
|
|
|
func TestDecompressTorrent(t *testing.T) {
|
|
t.Skip()
|
|
|
|
fpath := "/mnt/data/chains/mainnet/snapshots/v1-014000-014500-transactions.seg"
|
|
st, err := os.Stat(fpath)
|
|
require.NoError(t, err)
|
|
fmt.Printf("file: %v, size: %d\n", st.Name(), st.Size())
|
|
|
|
condensePatternTableBitThreshold = 9
|
|
fmt.Printf("bit threshold: %d\n", condensePatternTableBitThreshold)
|
|
d, err := NewDecompressor(fpath)
|
|
|
|
require.NoError(t, err)
|
|
defer d.Close()
|
|
|
|
getter := d.MakeGetter()
|
|
_ = getter
|
|
|
|
for getter.HasNext() {
|
|
_, sz := getter.Next(nil)
|
|
// fmt.Printf("%x\n", buf)
|
|
require.NotZero(t, sz)
|
|
}
|
|
}
|