From 8846b406ebc48e97bd7e653215960ad999531c1e Mon Sep 17 00:00:00 2001 From: Alex Sharov Date: Sun, 27 Mar 2022 13:41:16 +0700 Subject: [PATCH] gsa: remove terminator (make it closer to SAIS) #400 --- sais/gsa/gsa_test.go | 23 +++++++++------- sais/gsa/gsaca.go | 62 ++++++++++++++++---------------------------- 2 files changed, 36 insertions(+), 49 deletions(-) diff --git a/sais/gsa/gsa_test.go b/sais/gsa/gsa_test.go index 9d76b616b..a72c2805d 100644 --- a/sais/gsa/gsa_test.go +++ b/sais/gsa/gsa_test.go @@ -8,11 +8,15 @@ import ( ) func TestExampleGSA(t *testing.T) { - R := [][]byte{[]byte("hihihi")} + R := [][]byte{ + []byte("hihihi"), + []byte("hihihi"), + []byte("hihihi"), + } str, n := ConcatAll(R) - sa := make([]uint, SaSize(n)) - lcp := make([]int, LcpSize(n)) - da := make([]int32, DaSize(n)) + sa := make([]uint, n) + lcp := make([]int, n) + da := make([]int32, n) _ = GSA(str, sa, lcp, da) PrintArrays(str, sa, lcp, da, n) } @@ -20,9 +24,10 @@ func TestExampleGSA(t *testing.T) { func TestGSA(t *testing.T) { R := [][]byte{{4, 5, 6, 4, 5, 6, 4, 5, 6}} str, n := ConcatAll(R) - sa := make([]uint, SaSize(n)) + sa := make([]uint, n) lcp := make([]int, n) - _ = GSA(str, sa, lcp, nil) + da := make([]int32, n) + _ = GSA(str, sa, lcp, da) assert.Equal(t, []uint{10, 9, 6, 3, 0, 7, 4, 1, 8, 5, 2}, sa[:n]) } @@ -57,9 +62,9 @@ func BenchmarkName2(b *testing.B) { R = append(R, []byte("hihihi")) } str, n := ConcatAll(R) - sa := make([]uint, SaSize(n)) - lcp := make([]int, LcpSize(n)) - da := make([]int32, DaSize(n)) + sa := make([]uint, n) + lcp := make([]int, n) + da := make([]int32, n) b.ResetTimer() for i := 0; i < b.N; i++ { _ = GSA(str, sa, lcp, da) diff --git a/sais/gsa/gsaca.go b/sais/gsa/gsaca.go index 8b1e3f184..84908c413 100644 --- a/sais/gsa/gsaca.go +++ b/sais/gsa/gsaca.go @@ -2,6 +2,7 @@ package gsa /* #include "gsacak.h" +#cgo CFLAGS: -DTERMINATOR=0 -DM64=1 -Dm64=1 */ import "C" import ( @@ -12,66 +13,47 @@ import ( // Implementation from https://github.com/felipelouza/gsufsort // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-020-00177-y.pdf // see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0117-9.pdf - -func SaSize(l int) int { - var a uint - return l * int(unsafe.Sizeof(a)) -} -func LcpSize(l int) int { - var a uint - return l * int(unsafe.Sizeof(a)) -} -func DaSize(l int) int { - var a C.int_da - return l * int(unsafe.Sizeof(a)) -} func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) { + // remove terminator + sa = sa[1:] + lcp = lcp[1:] + terminatorDa := da[0] + da = da[1:] + n = n - 1 + fmt.Printf("i\t") - if da != nil { - fmt.Printf("da\t") - } + fmt.Printf("sa\t") if lcp != nil { fmt.Printf("lcp\t") } - fmt.Printf("gsa\t") + if da != nil { + fmt.Printf("gsa\t") + } fmt.Printf("suffixes\t") fmt.Printf("\n") for i := 0; i < n; i++ { fmt.Printf("%d\t", i) + fmt.Printf("%d\t", sa[i]) if lcp != nil { fmt.Printf("%d\t", lcp[i]) } - if da != nil { - pos := sa[da[i]] + if da != nil { // gsa + pos := uint(terminatorDa) + if da[i] > 0 { + pos = sa[da[i]-1] + } value := sa[i] if da[i] != 0 { value = sa[i] - pos - 1 } fmt.Printf("(%d %d)\t", da[i], value) - } else { - fmt.Printf("%d\t", sa[i]) } - /* - if(gsa){ - da_value = (light)?rankbv_rank1(rbv,SA[i]):DA[i]; - printf("(%" PRIdA ", ", da_value); + //bwt + // char c = (SA[i])? T[SA[i]-1]-1:terminal; + // if(c==0) c = '$'; + // printf("%c\t",c); - int_t pos; - if(last_end) pos=SA[da_value]; - else pos=SA[da_value-1]; - - int_t value = (da_value==0)?SA[i]:SA[i]-pos-1; - printf("%" PRIdN ") \t", value); - } - */ - /* - if(bwt){ - char c = (SA[i])? T[SA[i]-1]-1:terminal; - if(c==0) c = '$'; - printf("%c\t",c); - } - */ for j := sa[i]; int(j) < n; j++ { if str[j] == 1 { fmt.Printf("$")