diff --git a/sais/gsa/gsa_test.go b/sais/gsa/gsa_test.go index a72c2805d..096d0d957 100644 --- a/sais/gsa/gsa_test.go +++ b/sais/gsa/gsa_test.go @@ -9,16 +9,19 @@ import ( func TestExampleGSA(t *testing.T) { R := [][]byte{ - []byte("hihihi"), - []byte("hihihi"), - []byte("hihihi"), + []byte("hihi"), + []byte("alexhihialex"), + []byte("alex"), } str, n := ConcatAll(R) sa := make([]uint, n) lcp := make([]int, n) da := make([]int32, n) _ = GSA(str, sa, lcp, da) - PrintArrays(str, sa, lcp, da, n) + + PrintArrays(str, sa, lcp, da) + gsa := SA2GSA(sa, da) + _ = gsa } func TestGSA(t *testing.T) { diff --git a/sais/gsa/gsaca.go b/sais/gsa/gsaca.go index 84908c413..e15a89b72 100644 --- a/sais/gsa/gsaca.go +++ b/sais/gsa/gsaca.go @@ -13,13 +13,12 @@ import ( // Implementation from https://github.com/felipelouza/gsufsort // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-020-00177-y.pdf // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0117-9.pdf -func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) { +func PrintArrays(str []byte, sa []uint, lcp []int, da []int32) { // remove terminator + n := len(sa) - 1 sa = sa[1:] lcp = lcp[1:] - terminatorDa := da[0] da = da[1:] - n = n - 1 fmt.Printf("i\t") fmt.Printf("sa\t") @@ -39,13 +38,9 @@ func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) { } if da != nil { // gsa - pos := uint(terminatorDa) - if da[i] > 0 { - pos = sa[da[i]-1] - } value := sa[i] if da[i] != 0 { - value = sa[i] - pos - 1 + value = sa[i] - sa[da[i]-1] - 1 } fmt.Printf("(%d %d)\t", da[i], value) } @@ -67,6 +62,26 @@ func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) { fmt.Printf("\n") } } + +//nolint +// SA2GSA - example func to convert SA+DA to GSA +func SA2GSA(sa []uint, da []int32) []uint { + gsa := make([]uint, len(sa)) + copy(gsa, sa) + + // remove terminator + sa = sa[1:] + da = da[1:] + n := len(sa) - 1 + + for i := 0; i < n; i++ { + if da[i] != 0 { + gsa[i] = sa[i] - sa[da[i]-1] - 1 + } + } + return gsa +} + func GSA(data []byte, sa []uint, lcp []int, da []int32) error { tPtr := unsafe.Pointer(&data[0]) // source "text" var lcpPtr, saPtr, daPtr unsafe.Pointer