SPOJ - SUBST1 New Distinct Substrings —— 后缀数组 单个字符串的子串个数
题目链接:https://vjudge.net/problem/SPOJ-SUBST1
SUBST1 - New Distinct Substrings
Given a string, we need to find the total number of its distinct substrings.
Input
T- number of test cases. T<=20; Each test case consists of one string, whose length is <= 50000
Output
For each test case output one number saying the number of distinct substrings.
Example
Input: 2 CCCCC ABABA Output: 5 9
题意:
给出一个字符串,求这个字符串有多少种子串?
方法一:
方法二:
1.先不考虑重复的,那么长度为n的字符串,有n+(n-1)+……1 = n*(n+1)/2个子串。
2.然后再考虑重复出现的,即去重。利用后缀数组,求出height数组,那么减去∑height[i],即为答案。为何?
height[i]的定义:排名第i与排名第i-1的后缀的最长公共前缀。那么对于以sa[i](下标)为左端点的一群子串,最多有height[i]个是在以sa[i-1]为左端点的一群子串中出现过的,因此需要减去height[i]。并且排名相邻的子串,前缀的重叠率是最高的,因此不会出现遗漏。枚举每个height,即枚举每个左端点,即可减去所有重复出现的。
代码如下:
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 #include <vector> 6 #include <cmath> 7 #include <queue> 8 #include <stack> 9 #include <map> 10 #include <string> 11 #include <set> 12 using namespace std; 13 typedef long long LL; 14 const double EPS = 1e-6; 15 const int INF = 2e9; 16 const LL LNF = 9e18; 17 const int MOD = 1e5; 18 const int MAXN = 5e4+10; 19 20 bool cmp(int *r, int a, int b, int l) 21 { 22 return r[a]==r[b] && r[a+l]==r[b+l]; 23 } 24 25 int t1[MAXN], t2[MAXN], c[MAXN]; 26 void DA(int str[], int sa[], int Rank[], int height[], int n, int m) 27 { 28 n++; 29 int i, j, p, *x = t1, *y = t2; 30 for(i = 0; i<m; i++) c[i] = 0; 31 for(i = 0; i<n; i++) c[x[i] = str[i]]++; 32 for(i = 1; i<m; i++) c[i] += c[i-1]; 33 for(i = n-1; i>=0; i--) sa[--c[x[i]]] = i; 34 for(j = 1; j<=n; j <<= 1) 35 { 36 p = 0; 37 for(i = n-j; i<n; i++) y[p++] = i; 38 for(i = 0; i<n; i++) if(sa[i]>=j) y[p++] = sa[i]-j; 39 40 for(i = 0; i<m; i++) c[i] = 0; 41 for(i = 0; i<n; i++) c[x[y[i]]]++; 42 for(i = 1; i<m; i++) c[i] += c[i-1]; 43 for(i = n-1; i>=0; i--) sa[--c[x[y[i]]]] = y[i]; 44 45 swap(x, y); 46 p = 1; x[sa[0]] = 0; 47 for(i = 1; i<n; i++) 48 x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++; 49 if(p>=n) break; 50 m = p; 51 } 52 53 int k = 0; 54 n--; 55 for(i = 0; i<=n; i++) Rank[sa[i]] = i; 56 for(i = 0; i<n; i++) 57 { 58 if(k) k--; 59 j = sa[Rank[i]-1]; 60 while(str[i+k]==str[j+k]) k++; 61 height[Rank[i]] = k; 62 } 63 } 64 65 char str[MAXN]; 66 int r[MAXN], sa[MAXN], Rank[MAXN], height[MAXN]; 67 int main() 68 { 69 int T; 70 scanf("%d", &T); 71 while(T--) 72 { 73 scanf("%s", str); 74 int len = strlen(str); 75 for(int i = 0; i<len; i++) 76 r[i] = str[i]; 77 r[len] = 0; 78 DA(r, sa, Rank, height, len, 200); 79 80 /* 方法一: 81 LL ans = 0; 82 for(int i = 1; i<=len; i++) 83 ans += len-sa[i]-height[i]; 84 */ 85 // 方法二: 86 LL ans = 1LL*len*(len+1)/2; 87 for(int i = 2; i<=len; i++) 88 ans -= height[i]; 89 90 printf("%lld\n", ans); 91 } 92 }