SPOJ Distinct Substrings(后缀数组求不同子串个数,好题)
DISUBSTR - Distinct Substrings
Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20;
Each test case consists of one string, whose length is <= 1000
Output
For each test case output one number saying the number of distinct substrings.
Example
Sample Input:
2
CCCCC
ABABA
Sample Output:
5
9
Explanation for the testcase with string ABABA:
len=1 : A,B
len=2 : AB,BA
len=3 : ABA,BAB
len=4 : ABAB,BABA
len=5 : ABABA
Thus, total number of distinct substrings is 9.
题目链接:SPOJ DISUBSTR
一开始想用字典树,结果静态建树的Trie超时了(懒的写动态指针版……)真相是用后缀数组做的,因为每一个后缀的贡献原本为其长度,原本总贡献为$(len + 1) * len / 2$,但由于一些串重复,我们要减掉,再想一想,这些重复的是后缀的前缀,也就是$Suffix(x)$和$Suffix(y)$的公共前缀$LCP(x,y)$,但是x与y如何确定才能准确不遗漏地算出这些重复的串呢?按字典序排,然后height数组就是基于字典序排序的后缀,因此把所有height值减掉就好了。不过似乎有人用指针写的Trie过了,果然指针除了爆内存的风险,速度确实快啊。
想了一下用后缀数组只要$O(Nlog_{2}N)$,而字典树至少$O(N*N)$,果然不是一个档次……
代码:
#include <stdio.h> #include <iostream> #include <algorithm> #include <cstdlib> #include <cstring> #include <bitset> #include <string> #include <stack> #include <cmath> #include <queue> #include <set> #include <map> using namespace std; #define INF 0x3f3f3f3f #define LC(x) (x<<1) #define RC(x) ((x<<1)+1) #define MID(x,y) ((x+y)>>1) #define fin(name) freopen(name,"r",stdin) #define fout(name) freopen(name,"w",stdout) #define CLR(arr,val) memset(arr,val,sizeof(arr)) #define FAST_IO ios::sync_with_stdio(false);cin.tie(0); typedef pair<int, int> pii; typedef long long LL; const double PI = acos(-1.0); const int N = 1010; int wa[N], wb[N], cnt[N], sa[N]; int ran[N], height[N]; char s[N]; inline int cmp(int r[], int a, int b, int d) { return r[a] == r[b] && r[a + d] == r[b + d]; } void DA(int n, int m) { int i; int *x = wa, *y = wb; for (i = 0; i < m; ++i) cnt[i] = 0; for (i = 0; i < n; ++i) ++cnt[x[i] = s[i]]; for (i = 1; i < m; ++i) cnt[i] += cnt[i - 1]; for (i = n - 1; i >= 0; --i) sa[--cnt[x[i]]] = i; for (int k = 1; k <= n; k <<= 1) { int p = 0; for (i = n - k; i < n; ++i) y[p++] = i; for (i = 0; i < n; ++i) if (sa[i] >= k) y[p++] = sa[i] - k; for (i = 0; i < m; ++i) cnt[i] = 0; for (i = 0; i < n; ++i) ++cnt[x[y[i]]]; for (i = 1; i < m; ++i) cnt[i] += cnt[i - 1]; for (i = n - 1; i >= 0; --i) sa[--cnt[x[y[i]]]] = y[i]; swap(x, y); x[sa[0]] = 0; p = 1; for (i = 1; i < n; ++i) x[sa[i]] = cmp(y, sa[i - 1], sa[i], k) ? p - 1 : p++; m = p; if (p >= n) break; } } void getght(int n) { int i, k = 0; for (i = 1; i <= n; ++i) ran[sa[i]] = i; for (i = 0; i < n; ++i) { if (k) --k; int j = sa[ran[i] - 1]; while (s[i + k] == s[j + k]) ++k; height[ran[i]] = k; } } int main(void) { int T, i; scanf("%d", &T); while (T--) { scanf("%s", s); int len = strlen(s); DA(len + 1, *max_element(s, s + len) + 1); getght(len); int ans = (len + 1) * len >> 1; for (i = 1; i <= len; ++i) ans -= height[i]; printf("%d\n", ans); } return 0; }