sa

https://www.cnblogs.com/lykkk/p/10520070.html

 

 

 

#include "stdint.h"
#include "memory.h"

static inline int bits(size_t a){
unsigned int l, h;
h = a >> 32;
if (h)
return 64 - __builtin_clz(h);
l = a;
if (l)
return 32 - __builtin_clz(l);
return 0;
}



static inline int cmp2(size_t *r, size_t a, size_t b, size_t h, size_t n){
return (r[a]==r[b] && a + h < n && b + h < n && r[a+h]==r[b+h]);
}

#define swap(x, y) do {size_t *t = x; x = y; y = t;} while(0)


size_t * sufsort(size_t *sa/*[n]*/, size_t *x/*a[n]*/, size_t *y/*[n]*/, const uint8_t *a, size_t n)
{
size_t buckets[256] = {};
for (size_t i=0;i<n;i++) {
buckets[a[i]]++;
}
for(size_t i=1;i<256;i++) {
buckets[i] += buckets[i-1];
}
memmove(buckets + 1, buckets, sizeof(*buckets) * 255);
buckets[0] = 0;

//first count sort to I/sa
for (size_t i=0;i<n;i++) {
sa[buckets[a[i]]++]=i;
}



size_t p = 0;
x[sa[0]] = p++;
for (size_t i=1; i<n; i++){
x[sa[i]] = a[sa[i-1]] == a[sa[i]] ? p - 1 : p++;
}

for (size_t h = 1; p < n; h += h) {
p = 0;
for (size_t i = n - h; i < n; i++){
y[p++]=i; /*like 30, which is radix sort by 0*/
}

for (size_t i = 0; i < n; i++){
if (sa[i] >= h) {
y[p++]= sa[i] - h; /*radix sort by not 0*/
}
}

size_t step = bits(n);
step = (step + 7) & -8;
for (size_t d = 0; d < step; d += 8){
memset(buckets, 0, sizeof(buckets));
for (size_t i=0;i<n;i++) {
buckets[(x[y[i]] >> d) & 255]++;
}
for(size_t i=1;i<256;i++) {
buckets[i] += buckets[i-1];
}
memmove(buckets + 1, buckets, sizeof(*buckets) * 255);
buckets[0] = 0;

for (size_t i=0;i<n;i++) {
sa[buckets[(x[y[i]] >> d) & 255]++]=y[i];
}
swap(sa, y);
}
swap(sa, y);


p = 0;
y[sa[0]] = p++;
for (size_t i=1; i<n; i++){
y[sa[i]] = cmp2(x, sa[i-1], sa[i], h, n) ? p - 1 : p++;
}
swap(x, y);
}
return sa;
}

#include "stdio.h"
#include "time.h"
struct Clock
{
public:
Clock() {Start();}
void Start() {beg = clock();}
void Finished() {end = clock();}
clock_t Elapsed() {return Finished(), end - beg;}
double ElapsedToSec() {return (double)Elapsed()/CLOCKS_PER_SEC;}
double ElapsedToMin() {return ElapsedToSec() / 60;}
double ElapsedToHor() {return ElapsedToMin() / 60;}

private:
clock_t beg, end;
};

int main(){
FILE *fp = fopen("1.txt", "rb");
fseek(fp, 0, SEEK_END);
int N = ftell(fp);
fseek(fp, 0, SEEK_SET);
uint8_t *old = new uint8_t[N];
fread(old, 1, N, fp);
size_t* a = new size_t [N], *b = new size_t [N], *c = new size_t[N];

Clock t1;
for (int i = 0; i < 0x0; i++)
sufsort(a, b, c, old, N);
auto d = sufsort(a, b, c, old, N);
auto e1 = t1.ElapsedToSec();

void qsufsort(ssize_t *I,ssize_t *V,uint8_t *old,ssize_t oldsize);
ssize_t* I = new ssize_t[N+1], *V = new ssize_t[N+1];
Clock t2;
for (int i = 0; i < 0x0; i++)
qsufsort(I, V, old, N);
qsufsort(I, V, old, N);
auto e2 = t2.ElapsedToSec();
printf("%lf\n", e1);
printf("%lf\n", e2);


int r= memcmp(I + 1, d, sizeof(a));

return r;
}


int cmain(){
#define N 400
unsigned char old[N] = {3, 3, 3, 3};
size_t a[N], b[N], c[N];
auto d = sufsort(a, b, c, old, N);

void qsufsort(ssize_t *I,ssize_t *V,uint8_t *old,ssize_t oldsize);
ssize_t I[N+1], V[N+1];
qsufsort(I, V, old, N);

bool r= memcmp(I + 1, a, sizeof(a)) == 0;
return r;
}
posted @   zJanly  阅读(498)  评论(0编辑  收藏  举报
编辑推荐:
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
历史上的今天:
2020-07-15 f2m
点击右上角即可分享
微信分享提示