SPOJ - SUBST1 New Distinct Substrings —— 后缀数组 单个字符串的子串个数

题目链接:https://vjudge.net/problem/SPOJ-SUBST1

 

SUBST1 - New Distinct Substrings

 

Given a string, we need to find the total number of its distinct substrings.

Input

T- number of test cases. T<=20; Each test case consists of one string, whose length is <= 50000

Output

For each test case output one number saying the number of distinct substrings.

Example

Input:
2
CCCCC
ABABA

Output:
5
9

 

 

题意:

给出一个字符串,求这个字符串有多少种子串?

 

方法一:

 

方法二:

1.先不考虑重复的,那么长度为n的字符串,有n+(n-1)+……1 = n*(n+1)/2个子串。

2.然后再考虑重复出现的,即去重。利用后缀数组,求出height数组,那么减去∑height[i],即为答案。为何?

height[i]的定义:排名第i与排名第i-1的后缀的最长公共前缀。那么对于以sa[i](下标)为左端点的一群子串,最多有height[i]个是在以sa[i-1]为左端点的一群子串中出现过的,因此需要减去height[i]。并且排名相邻的子串,前缀的重叠率是最高的,因此不会出现遗漏。枚举每个height,即枚举每个左端点,即可减去所有重复出现的。

 

代码如下:

 1 #include <iostream>
 2 #include <cstdio>
 3 #include <cstring>
 4 #include <algorithm>
 5 #include <vector>
 6 #include <cmath>
 7 #include <queue>
 8 #include <stack>
 9 #include <map>
10 #include <string>
11 #include <set>
12 using namespace std;
13 typedef long long LL;
14 const double EPS = 1e-6;
15 const int INF = 2e9;
16 const LL LNF = 9e18;
17 const int MOD = 1e5;
18 const int MAXN = 5e4+10;
19 
20 bool cmp(int *r, int a, int b, int l)
21 {
22     return r[a]==r[b] && r[a+l]==r[b+l];
23 }
24 
25 int t1[MAXN], t2[MAXN], c[MAXN];
26 void DA(int str[], int sa[], int Rank[], int height[], int n, int m)
27 {
28     n++;
29     int i, j, p, *x = t1, *y = t2;
30     for(i = 0; i<m; i++) c[i] = 0;
31     for(i = 0; i<n; i++) c[x[i] = str[i]]++;
32     for(i = 1; i<m; i++) c[i] += c[i-1];
33     for(i = n-1; i>=0; i--) sa[--c[x[i]]] = i;
34     for(j = 1; j<=n; j <<= 1)
35     {
36         p = 0;
37         for(i = n-j; i<n; i++) y[p++] = i;
38         for(i = 0; i<n; i++) if(sa[i]>=j) y[p++] = sa[i]-j;
39 
40         for(i = 0; i<m; i++) c[i] = 0;
41         for(i = 0; i<n; i++) c[x[y[i]]]++;
42         for(i = 1; i<m; i++) c[i] += c[i-1];
43         for(i = n-1; i>=0; i--) sa[--c[x[y[i]]]] = y[i];
44 
45         swap(x, y);
46         p = 1; x[sa[0]] = 0;
47         for(i = 1; i<n; i++)
48             x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++;
49         if(p>=n) break;
50         m = p;
51     }
52 
53     int k = 0;
54     n--;
55     for(i = 0; i<=n; i++) Rank[sa[i]] = i;
56     for(i = 0; i<n; i++)
57     {
58         if(k) k--;
59         j = sa[Rank[i]-1];
60         while(str[i+k]==str[j+k]) k++;
61         height[Rank[i]] = k;
62     }
63 }
64 
65 char str[MAXN];
66 int r[MAXN], sa[MAXN], Rank[MAXN], height[MAXN];
67 int main()
68 {
69     int T;
70     scanf("%d", &T);
71     while(T--)
72     {
73         scanf("%s", str);
74         int len = strlen(str);
75         for(int i = 0; i<len; i++)
76             r[i] = str[i];
77         r[len] = 0;
78         DA(r, sa, Rank, height, len, 200);
79 
80         /* 方法一:
81         LL ans = 0;
82         for(int i = 1; i<=len; i++)
83             ans += len-sa[i]-height[i];
84         */
85         // 方法二:
86         LL ans = 1LL*len*(len+1)/2;
87         for(int i = 2; i<=len; i++)
88             ans -= height[i];
89 
90         printf("%lld\n", ans);
91     }
92 }
View Code

 

posted on 2018-02-24 16:21  h_z_cong  阅读(200)  评论(0编辑  收藏  举报

导航