gsa print

This commit is contained in:
alex.sharov 2022-03-27 11:36:30 +07:00
parent 2c61236c58
commit 3de8cfedd0
2 changed files with 113 additions and 19 deletions

View File

@ -1,33 +1,20 @@
package gsa
import (
"fmt"
"testing"
"github.com/ledgerwatch/erigon-lib/sais"
"github.com/stretchr/testify/assert"
)
func ExampleGSA() {
func TestExampleGSA(t *testing.T) {
R := [][]byte{[]byte("hihihi")}
str, n := ConcatAll(R)
sa2 := make([]uint, SaSize(n))
sa := make([]uint, SaSize(n))
lcp := make([]int, LcpSize(n))
_ = GSA(str, sa2, lcp, nil)
for i := 0; i < n; i++ {
j := sa2[i]
for ; int(j) < n; j++ {
if str[j] == 1 {
fmt.Printf("$")
break
} else if str[j] == 0 {
fmt.Printf("#")
} else {
fmt.Printf("%c", str[j]-1)
}
}
fmt.Printf("\n")
}
fmt.Printf("%d\n", sa2)
da := make([]int32, DaSize(n))
_ = GSA(str, sa, lcp, da)
PrintArrays(str, sa, lcp, da, n)
}
func TestGSA(t *testing.T) {
@ -38,3 +25,43 @@ func TestGSA(t *testing.T) {
_ = GSA(str, sa, lcp, nil)
assert.Equal(t, []uint{10, 9, 6, 3, 0, 7, 4, 1, 8, 5, 2}, sa[:n])
}
const N = 100_000
func BenchmarkName(b *testing.B) {
R := make([][]byte, 0, N)
for i := 0; i < N; i++ {
R = append(R, []byte("hihihi"))
}
superstring := make([]byte, 0, 1024)
for _, a := range R {
for _, b := range a {
superstring = append(superstring, 1, b)
}
superstring = append(superstring, 0, 0)
}
sa := make([]int32, len(superstring))
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := sais.Sais(superstring, sa)
if err != nil {
panic(err)
}
}
}
func BenchmarkName2(b *testing.B) {
R := make([][]byte, 0, N)
for i := 0; i < N; i++ {
R = append(R, []byte("hihihi"))
}
str, n := ConcatAll(R)
sa := make([]uint, SaSize(n))
lcp := make([]int, LcpSize(n))
da := make([]int32, DaSize(n))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_ = GSA(str, sa, lcp, da)
}
}

View File

@ -5,10 +5,13 @@ package gsa
*/
import "C"
import (
"fmt"
"unsafe"
)
// Implementation from https://github.com/felipelouza/gsufsort
// see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-020-00177-y.pdf
// see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0117-9.pdf
func SaSize(l int) int {
var a uint
@ -18,6 +21,70 @@ func LcpSize(l int) int {
var a uint
return l * int(unsafe.Sizeof(a))
}
func DaSize(l int) int {
var a C.int_da
return l * int(unsafe.Sizeof(a))
}
func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) {
fmt.Printf("i\t")
if da != nil {
fmt.Printf("da\t")
}
if lcp != nil {
fmt.Printf("lcp\t")
}
fmt.Printf("gsa\t")
fmt.Printf("suffixes\t")
fmt.Printf("\n")
for i := 0; i < n; i++ {
fmt.Printf("%d\t", i)
if lcp != nil {
fmt.Printf("%d\t", lcp[i])
}
if da != nil {
pos := sa[da[i]]
value := sa[i]
if da[i] != 0 {
value = sa[i] - pos - 1
}
fmt.Printf("(%d %d)\t", da[i], value)
} else {
fmt.Printf("%d\t", sa[i])
}
/*
if(gsa){
da_value = (light)?rankbv_rank1(rbv,SA[i]):DA[i];
printf("(%" PRIdA ", ", da_value);
int_t pos;
if(last_end) pos=SA[da_value];
else pos=SA[da_value-1];
int_t value = (da_value==0)?SA[i]:SA[i]-pos-1;
printf("%" PRIdN ") \t", value);
}
*/
/*
if(bwt){
char c = (SA[i])? T[SA[i]-1]-1:terminal;
if(c==0) c = '$';
printf("%c\t",c);
}
*/
for j := sa[i]; int(j) < n; j++ {
if str[j] == 1 {
fmt.Printf("$")
break
} else if str[j] == 0 {
fmt.Printf("#")
} else {
fmt.Printf("%c", str[j]-1)
}
}
fmt.Printf("\n")
}
}
func GSA(data []byte, sa []uint, lcp []int, da []int32) error {
tPtr := unsafe.Pointer(&data[0]) // source "text"
var lcpPtr, saPtr, daPtr unsafe.Pointer