GSA simplify and more examples (#402)

This commit is contained in:
Alex Sharov 2022-03-28 09:02:01 +07:00 committed by GitHub
parent 8846b406eb
commit dff0f93abb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 30 additions and 12 deletions

View File

@ -9,16 +9,19 @@ import (
func TestExampleGSA(t *testing.T) {
R := [][]byte{
[]byte("hihihi"),
[]byte("hihihi"),
[]byte("hihihi"),
[]byte("hihi"),
[]byte("alexhihialex"),
[]byte("alex"),
}
str, n := ConcatAll(R)
sa := make([]uint, n)
lcp := make([]int, n)
da := make([]int32, n)
_ = GSA(str, sa, lcp, da)
PrintArrays(str, sa, lcp, da, n)
PrintArrays(str, sa, lcp, da)
gsa := SA2GSA(sa, da)
_ = gsa
}
func TestGSA(t *testing.T) {

View File

@ -13,13 +13,12 @@ import (
// Implementation from https://github.com/felipelouza/gsufsort
// see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-020-00177-y.pdf
// see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0117-9.pdf
func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) {
func PrintArrays(str []byte, sa []uint, lcp []int, da []int32) {
// remove terminator
n := len(sa) - 1
sa = sa[1:]
lcp = lcp[1:]
terminatorDa := da[0]
da = da[1:]
n = n - 1
fmt.Printf("i\t")
fmt.Printf("sa\t")
@ -39,13 +38,9 @@ func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) {
}
if da != nil { // gsa
pos := uint(terminatorDa)
if da[i] > 0 {
pos = sa[da[i]-1]
}
value := sa[i]
if da[i] != 0 {
value = sa[i] - pos - 1
value = sa[i] - sa[da[i]-1] - 1
}
fmt.Printf("(%d %d)\t", da[i], value)
}
@ -67,6 +62,26 @@ func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) {
fmt.Printf("\n")
}
}
//nolint
// SA2GSA - example func to convert SA+DA to GSA
func SA2GSA(sa []uint, da []int32) []uint {
gsa := make([]uint, len(sa))
copy(gsa, sa)
// remove terminator
sa = sa[1:]
da = da[1:]
n := len(sa) - 1
for i := 0; i < n; i++ {
if da[i] != 0 {
gsa[i] = sa[i] - sa[da[i]-1] - 1
}
}
return gsa
}
func GSA(data []byte, sa []uint, lcp []int, da []int32) error {
tPtr := unsafe.Pointer(&data[0]) // source "text"
var lcpPtr, saPtr, daPtr unsafe.Pointer