erigon-pulse/sais/gsa/gsaca.go

126 lines
2.3 KiB
Go
Raw Normal View History

package gsa
/*
#include "gsacak.h"
#cgo CFLAGS: -DTERMINATOR=0 -DM64=1 -Dm64=1
*/
import "C"
import (
2022-03-27 04:36:30 +00:00
"fmt"
"unsafe"
)
// Implementation from https://github.com/felipelouza/gsufsort
2022-03-27 04:36:30 +00:00
// see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-020-00177-y.pdf
// see also: https://almob.biomedcentral.com/track/pdf/10.1186/s13015-017-0117-9.pdf
func PrintArrays(str []byte, sa []uint, lcp []int, da []int32, n int) {
// remove terminator
sa = sa[1:]
lcp = lcp[1:]
terminatorDa := da[0]
da = da[1:]
n = n - 1
2022-03-27 04:36:30 +00:00
fmt.Printf("i\t")
fmt.Printf("sa\t")
2022-03-27 04:36:30 +00:00
if lcp != nil {
fmt.Printf("lcp\t")
}
if da != nil {
fmt.Printf("gsa\t")
}
2022-03-27 04:36:30 +00:00
fmt.Printf("suffixes\t")
fmt.Printf("\n")
for i := 0; i < n; i++ {
fmt.Printf("%d\t", i)
fmt.Printf("%d\t", sa[i])
2022-03-27 04:36:30 +00:00
if lcp != nil {
fmt.Printf("%d\t", lcp[i])
}
if da != nil { // gsa
pos := uint(terminatorDa)
if da[i] > 0 {
pos = sa[da[i]-1]
}
2022-03-27 04:36:30 +00:00
value := sa[i]
if da[i] != 0 {
value = sa[i] - pos - 1
}
fmt.Printf("(%d %d)\t", da[i], value)
}
//bwt
// char c = (SA[i])? T[SA[i]-1]-1:terminal;
// if(c==0) c = '$';
// printf("%c\t",c);
2022-03-27 04:36:30 +00:00
for j := sa[i]; int(j) < n; j++ {
if str[j] == 1 {
fmt.Printf("$")
break
} else if str[j] == 0 {
fmt.Printf("#")
} else {
fmt.Printf("%c", str[j]-1)
}
}
fmt.Printf("\n")
}
}
func GSA(data []byte, sa []uint, lcp []int, da []int32) error {
tPtr := unsafe.Pointer(&data[0]) // source "text"
var lcpPtr, saPtr, daPtr unsafe.Pointer
if sa != nil {
saPtr = unsafe.Pointer(&sa[0])
}
if lcp != nil {
lcpPtr = unsafe.Pointer(&lcp[0])
}
if da != nil {
daPtr = unsafe.Pointer(&da[0])
}
depth := C.gsacak(
(*C.uchar)(tPtr),
(*C.uint_t)(saPtr),
(*C.int_t)(lcpPtr),
(*C.int_da)(daPtr),
C.uint_t(len(data)),
)
_ = depth
return nil
}
func ConcatAll(R [][]byte) (str []byte, n int) {
for i := 0; i < len(R); i++ {
n += len(R[i]) + 1
}
n++ //add 0 at the end
str = make([]byte, n)
var l, max int
k := len(R)
for i := 0; i < k; i++ {
m := len(R[i])
if m > max {
max = m
}
for j := 0; j < m; j++ {
if R[i][j] < 255 && R[i][j] > 1 {
str[l] = R[i][j] + 1
l++
}
}
if m > 0 {
if str[l-1] > 1 {
str[l] = 1
l++
} //add 1 as separator (ignores empty entries)
}
}
str[l] = 0
l++
n = l
return str, n
}