后缀数组(笔记)
dalao的博客,讲的很详细
倍增法:设当前已知各后缀的前 \(2^k\) 个字符的相对大小关系(即排名数组 \(rank\)), 于是只要用 \((rank[i], rank[i + 2 ^k])\) 进行双关键字排序就可以得到前 \(2^{k+1}\) 个字符的相对大小关系
跟没有也差不多的upd:
注意 \(tp[i]\) 的意义:按 \(rank[i + 2 ^ k]\) 为关键字排序后的第 \(i\) 个后缀,然后由于基数排序的稳定性,再次按照 \(rank[i]\) 排序时相当于进行了双关键字排序
代码调了一个小时才知道原来还有指针引用这种玩意
#include<cstdio>
#include<cstring>
using namespace std;
const int MAXN = 1000010;
int t1[MAXN], t2[MAXN], sa[MAXN], tax[MAXN];
char str[MAXN];
inline void Swap(int* &a, int* &b){//丧病的复合类型
int *t = a; a = b, b = t;
}
void getSA(char s[], int len){
int p = 0, crd;
int *rak = t1, *tp = t2;
crd = 122;
for (int i = 1; i <= len; ++i) rak[i] = s[i], tp[i] = i;
for (int i = 0; i <= crd; ++i) tax[i] = 0;
for (int i = 1; i <= len; ++i) ++tax[rak[i]];
for (int i = 1; i <= crd; ++i) tax[i] += tax[i - 1];
for (int i = len; i >= 1; --i) sa[tax[rak[tp[i]]]--] = tp[i];
for (int w = 1; p != len; w <<= 1, crd = p){
p = 0;
for (int i = len - w + 1; i <= len; ++i)
tp[++p] = i;
for (int i = 1; i <= len; ++i)
if (sa[i] > w)
tp[++p] = sa[i] - w;
for (int i = 0; i <= crd; ++i) tax[i] = 0;
for (int i = 1; i <= len; ++i) ++tax[rak[i]];
for (int i = 1; i <= crd; ++i) tax[i] += tax[i - 1];
for (int i = len; i >= 1; --i) sa[tax[rak[tp[i]]]--] = tp[i];
Swap(rak, tp);
rak[sa[1]] = p = 1;
for (int i = 2; i <= len; ++i)
rak[sa[i]] = (tp[sa[i]] == tp[sa[i - 1]] && tp[sa[i] + w] == tp[sa[i - 1] + w]) ? p : ++p;
}
}
int main(){
scanf("%s", str + 1);
int slen = strlen(str + 1);
getSA(str, slen);
for (int i = 1; i <= slen; ++i)
printf("%d ", sa[i]);
return 0;
}