erigon-pulse/compress/decompress_test.go
ledgerwatch 75b52ac25e
[compress] Allow uncompressed words (#350)
* Intermediate work

* Allow uncompressed words

* Fix

* Fix tests

* Add NextUncompressed, remove g.word buffer

* Code simplifications, no goroutines when workers == 1

* Fix lint|

* Add test for MatchPrefix

* Work on patricia

* Beginning of new matcher

* Fuzz test for new longest match

* No skip

* Fixes

* Fixes

* More tracing

* Fixes

* Fixes

* Change back to old FindLongestMatches

* Switch to old match finder

* Print mismatches

* Fix

* After fix

* After fix

* After fix

* Print pointers

* Fixes and tests

* Print

* Print

* Print

* More tests

* Intermediate

* Fix

* Fix

* Prints

* Fix

* Fix

* Initialise matchStack

* Compute only once

* Compute only once

* Switch back

* Switch to old Find

* Introduce sais

* Switch patricia to sais

* Use sais in compressor

* Use sais in compressor

* Remove unused code

Co-authored-by: Alexey Sharp <alexeysharp@Alexeys-iMac.local>
Co-authored-by: Alex Sharp <alexsharp@Alexs-MacBook-Pro.local>
2022-03-09 17:25:22 +00:00

164 lines
3.8 KiB
Go

/*
Copyright 2021 Erigon contributors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package compress
import (
"context"
"fmt"
"path/filepath"
"strings"
"testing"
)
func prepareLoremDict(t *testing.T) *Decompressor {
tmpDir := t.TempDir()
file := filepath.Join(tmpDir, "compressed")
t.Name()
c, err := NewCompressor(context.Background(), t.Name(), file, tmpDir, 1, 2)
if err != nil {
t.Fatal(err)
}
defer c.Close()
for k, w := range loremStrings {
if err = c.AddWord([]byte(fmt.Sprintf("%s %d", w, k))); err != nil {
t.Fatal(err)
}
}
if err = c.Compress(); err != nil {
t.Fatal(err)
}
var d *Decompressor
if d, err = NewDecompressor(file); err != nil {
t.Fatal(err)
}
return d
}
func TestDecompressSkip(t *testing.T) {
d := prepareLoremDict(t)
defer d.Close()
g := d.MakeGetter()
i := 0
for g.HasNext() {
w := loremStrings[i]
if i%2 == 0 {
g.Skip()
} else {
word, _ := g.Next(nil)
expected := fmt.Sprintf("%s %d", w, i)
if string(word) != expected {
t.Errorf("expected %s, got (hex) %s", expected, word)
}
}
i++
}
}
func TestDecompressMatchOK(t *testing.T) {
d := prepareLoremDict(t)
defer d.Close()
g := d.MakeGetter()
i := 0
for g.HasNext() {
w := loremStrings[i]
if i%2 != 0 {
expected := fmt.Sprintf("%s %d", w, i)
ok, _ := g.Match([]byte(expected))
if !ok {
t.Errorf("expexted match with %s", expected)
}
} else {
word, _ := g.Next(nil)
expected := fmt.Sprintf("%s %d", w, i)
if string(word) != expected {
t.Errorf("expected %s, got (hex) %s", expected, word)
}
}
i++
}
}
func TestDecompressMatchNotOK(t *testing.T) {
d := prepareLoremDict(t)
defer d.Close()
g := d.MakeGetter()
i := 0
skipCount := 0
for g.HasNext() {
w := loremStrings[i]
expected := fmt.Sprintf("%s %d", w, i+1)
ok, _ := g.Match([]byte(expected))
if ok {
t.Errorf("not expexted match with %s", expected)
} else {
g.Skip()
skipCount++
}
i++
}
if skipCount != i {
t.Errorf("something wrong with match logic")
}
}
func TestDecompressMatchPrefix(t *testing.T) {
d := prepareLoremDict(t)
defer d.Close()
g := d.MakeGetter()
i := 0
skipCount := 0
for g.HasNext() {
w := loremStrings[i]
expected := []byte(fmt.Sprintf("%s %d", w, i+1))
expected = expected[:len(expected)/2]
if !g.MatchPrefix(expected) {
t.Errorf("expexted match with %s", expected)
}
g.Skip()
skipCount++
i++
}
if skipCount != i {
t.Errorf("something wrong with match logic")
}
g.Reset(0)
skipCount = 0
i = 0
for g.HasNext() {
w := loremStrings[i]
expected := []byte(fmt.Sprintf("%s %d", w, i+1))
expected = expected[:len(expected)/2]
if len(expected) > 0 {
expected[len(expected)-1]++
if g.MatchPrefix(expected) {
t.Errorf("not expexted match with %s", expected)
}
}
g.Skip()
skipCount++
i++
}
}
const lorem = `Lorem ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et
dolore magna aliqua Ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo
consequat Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur
Excepteur sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est laborum`
var loremStrings = strings.Split(lorem, " ")