2021-10-16 09:43:41 +00:00
|
|
|
/*
|
|
|
|
Copyright 2021 Erigon contributors
|
|
|
|
|
|
|
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
you may not use this file except in compliance with the License.
|
|
|
|
You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package compress
|
|
|
|
|
|
|
|
import (
|
2022-01-18 05:55:20 +00:00
|
|
|
"context"
|
2021-10-16 09:43:41 +00:00
|
|
|
"fmt"
|
2022-01-27 10:13:26 +00:00
|
|
|
"hash/crc32"
|
|
|
|
"io"
|
|
|
|
"os"
|
2022-01-18 05:55:20 +00:00
|
|
|
"path/filepath"
|
2021-10-16 09:43:41 +00:00
|
|
|
"testing"
|
2022-04-01 03:44:25 +00:00
|
|
|
|
|
|
|
"github.com/ledgerwatch/log/v3"
|
2022-05-16 19:59:29 +00:00
|
|
|
"github.com/stretchr/testify/require"
|
2021-10-16 09:43:41 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestCompressEmptyDict(t *testing.T) {
|
|
|
|
tmpDir := t.TempDir()
|
2022-01-18 05:55:20 +00:00
|
|
|
file := filepath.Join(tmpDir, "compressed")
|
2022-04-01 03:44:25 +00:00
|
|
|
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 100, 1, log.LvlDebug)
|
2021-10-16 09:43:41 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2022-01-18 05:55:20 +00:00
|
|
|
defer c.Close()
|
|
|
|
|
2021-10-16 09:43:41 +00:00
|
|
|
if err = c.AddWord([]byte("word")); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if err = c.Compress(); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
var d *Decompressor
|
|
|
|
if d, err = NewDecompressor(file); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2022-01-18 05:55:20 +00:00
|
|
|
defer d.Close()
|
2021-10-16 09:43:41 +00:00
|
|
|
g := d.MakeGetter()
|
|
|
|
if !g.HasNext() {
|
|
|
|
t.Fatalf("expected a word")
|
|
|
|
}
|
|
|
|
word, _ := g.Next(nil)
|
|
|
|
if string(word) != "word" {
|
|
|
|
t.Fatalf("expeced word, got (hex) %x", word)
|
|
|
|
}
|
|
|
|
if g.HasNext() {
|
|
|
|
t.Fatalf("not expecting anything else")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-27 10:13:26 +00:00
|
|
|
//nolint
|
|
|
|
func checksum(file string) uint32 {
|
|
|
|
hasher := crc32.NewIEEE()
|
|
|
|
f, err := os.Open(file)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
if _, err := io.Copy(hasher, f); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
return hasher.Sum32()
|
|
|
|
}
|
|
|
|
|
2022-01-24 09:18:08 +00:00
|
|
|
func prepareDict(t *testing.T) *Decompressor {
|
2022-03-19 04:38:37 +00:00
|
|
|
t.Helper()
|
2021-10-16 09:43:41 +00:00
|
|
|
tmpDir := t.TempDir()
|
2022-02-12 13:11:30 +00:00
|
|
|
file := filepath.Join(tmpDir, "compressed")
|
2021-10-25 02:12:00 +00:00
|
|
|
t.Name()
|
2022-04-01 03:44:25 +00:00
|
|
|
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2, log.LvlDebug)
|
2021-10-16 09:43:41 +00:00
|
|
|
if err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2022-01-18 05:55:20 +00:00
|
|
|
defer c.Close()
|
2021-10-16 09:43:41 +00:00
|
|
|
for i := 0; i < 100; i++ {
|
2022-05-16 19:59:29 +00:00
|
|
|
if err = c.AddWord(nil); err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
2022-05-18 03:29:19 +00:00
|
|
|
if err = c.AddWord([]byte("long")); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
if err = c.AddWord([]byte("word")); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2022-05-18 07:36:01 +00:00
|
|
|
if err = c.AddWord([]byte(fmt.Sprintf("%d longlongword %d", i, i))); err != nil {
|
2021-10-16 09:43:41 +00:00
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err = c.Compress(); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
|
|
|
var d *Decompressor
|
|
|
|
if d, err = NewDecompressor(file); err != nil {
|
|
|
|
t.Fatal(err)
|
|
|
|
}
|
2022-01-24 09:18:08 +00:00
|
|
|
return d
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestCompressDict1(t *testing.T) {
|
|
|
|
d := prepareDict(t)
|
2022-01-18 05:55:20 +00:00
|
|
|
defer d.Close()
|
2021-10-16 09:43:41 +00:00
|
|
|
g := d.MakeGetter()
|
|
|
|
i := 0
|
2022-05-16 19:59:29 +00:00
|
|
|
g.Reset(0)
|
2021-10-16 09:43:41 +00:00
|
|
|
for g.HasNext() {
|
2022-05-18 03:29:19 +00:00
|
|
|
// next word is `nil`
|
2022-05-16 19:59:29 +00:00
|
|
|
require.False(t, g.MatchPrefix([]byte("long")))
|
2022-05-18 03:29:19 +00:00
|
|
|
require.True(t, g.MatchPrefix([]byte("")))
|
2022-05-16 19:59:29 +00:00
|
|
|
require.True(t, g.MatchPrefix([]byte{}))
|
2021-10-16 09:43:41 +00:00
|
|
|
word, _ := g.Next(nil)
|
2022-05-16 19:59:29 +00:00
|
|
|
require.Nil(t, word)
|
|
|
|
|
2022-05-18 03:29:19 +00:00
|
|
|
// next word is `long`
|
|
|
|
require.True(t, g.MatchPrefix([]byte("long")))
|
|
|
|
require.False(t, g.MatchPrefix([]byte("longlong")))
|
|
|
|
require.False(t, g.MatchPrefix([]byte("wordnotmatch")))
|
|
|
|
require.False(t, g.MatchPrefix([]byte("longnotmatch")))
|
|
|
|
require.True(t, g.MatchPrefix([]byte{}))
|
|
|
|
_, _ = g.Next(nil)
|
|
|
|
|
|
|
|
// next word is `word`
|
|
|
|
require.False(t, g.MatchPrefix([]byte("long")))
|
|
|
|
require.False(t, g.MatchPrefix([]byte("longlong")))
|
|
|
|
require.True(t, g.MatchPrefix([]byte("word")))
|
|
|
|
require.True(t, g.MatchPrefix([]byte("")))
|
|
|
|
require.True(t, g.MatchPrefix(nil))
|
|
|
|
require.False(t, g.MatchPrefix([]byte("wordnotmatch")))
|
|
|
|
require.False(t, g.MatchPrefix([]byte("longnotmatch")))
|
|
|
|
_, _ = g.Next(nil)
|
|
|
|
|
|
|
|
// next word is `longlongword %d`
|
2022-05-18 07:36:01 +00:00
|
|
|
expectPrefix := fmt.Sprintf("%d long", i)
|
|
|
|
|
|
|
|
require.True(t, g.MatchPrefix([]byte(fmt.Sprintf("%d", i))))
|
|
|
|
require.True(t, g.MatchPrefix([]byte(expectPrefix)))
|
|
|
|
require.True(t, g.MatchPrefix([]byte(expectPrefix+"long")))
|
|
|
|
require.True(t, g.MatchPrefix([]byte(expectPrefix+"longword ")))
|
2022-05-18 03:29:19 +00:00
|
|
|
require.False(t, g.MatchPrefix([]byte("wordnotmatch")))
|
|
|
|
require.False(t, g.MatchPrefix([]byte("longnotmatch")))
|
2022-05-16 19:59:29 +00:00
|
|
|
require.True(t, g.MatchPrefix([]byte{}))
|
|
|
|
word, _ = g.Next(nil)
|
2022-05-18 07:36:01 +00:00
|
|
|
expected := fmt.Sprintf("%d longlongword %d", i, i)
|
2021-10-16 09:43:41 +00:00
|
|
|
if string(word) != expected {
|
2022-03-18 09:10:18 +00:00
|
|
|
t.Errorf("expected %s, got (hex) [%s]", expected, word)
|
2021-10-16 09:43:41 +00:00
|
|
|
}
|
|
|
|
i++
|
|
|
|
}
|
2022-01-27 10:13:26 +00:00
|
|
|
|
2022-05-18 07:36:01 +00:00
|
|
|
if cs := checksum(d.compressedFile); cs != 3153486123 {
|
2022-01-27 10:13:26 +00:00
|
|
|
// it's ok if hash changed, but need re-generate all existing snapshot hashes
|
|
|
|
// in https://github.com/ledgerwatch/erigon-snapshot
|
2022-03-09 17:25:22 +00:00
|
|
|
t.Errorf("result file hash changed, %d", cs)
|
2022-01-27 10:13:26 +00:00
|
|
|
}
|
2021-10-16 09:43:41 +00:00
|
|
|
}
|