后缀数组应用3: 求子串个数
View Code
#include<stdio.h> #include<stdlib.h> #include<string.h> #include<iostream> #include<vector> #include<string> #include<math.h> #include<map> #include<set> #include<algorithm> using namespace std; #define MAXN 10010 int sa[MAXN], rank[MAXN], sum[MAXN], height[MAXN]; int wa[MAXN], wb[MAXN], wx[MAXN], wsum[MAXN]; char str[MAXN]; int dp[1010][20]; /* RMQ: dp[i][j] = max(dp[i][j-1], dp[i + 2 ^(j-1)][j-1]) dp[i][0] = A[i]; 求区间最值[i,j] int L =lg( j - i + 1 ) return max(dp[i][L], dp[j + 1 - 2 ^ L][L]); */ //预处理height数组 void pre(int n) { for( int i = 0; i <= n; i++) dp[i][0] = height[i]; int L = (int) log2(n); for( int j = 1; j <= L; j++) { for( int i = 1; i <= n + 1 - (1<<j); i++) dp[i][j] = min(dp[i][j-1], dp[i + (1<<(j-1))][j-1]); } } int get_min( int a, int b) { int L = (int) log2(b - a + 1 ); return min(dp[a][L], dp[b + 1 - (1<<L)][L]); } //比较字符串是否相等 int cmp( int *r, int a, int b, int l) { return (r[a] == r[b] && r[a+l] == r[b+l]); } //倍增算法求sa数组 void get_sa(char *r, int *sa, int n, int m) //r为字符串, sa数组, n为字符串长度, m为字符串最大值 { int i, j,p, *x = wa, *y = wb, *t; for( i = 0; i < m; i++) sum[i] = 0; //对长度为1时后缀字符串排序 for( i = 0; i < n; i++) sum[ x[i] = r[i] ]++; //x相当于rank,但不是真正rank for( i = 1; i < m; i++) sum[i] += sum[i-1]; for( i = n-1; i >= 0; i--) sa[--sum[x[i]]] = i; //对长度为2,4,...的后缀字符串排序 for(j = 1, p = 1; p < n && j <= n; j *= 2) { //首先对关键字y排序,排序后的结果保存在y数组中,即是这个后缀字符串的起始位置 for(p = 0,i = n - j; i < n; i++) y[p++] = i; for(i = 0; i < n; i++) if( sa[i] >= j ) y[p++] = sa[i] - j; //然后对关键字x排序,先要获取第1关键字x for(i = 0; i < n; i++) wx[i] = x[y[i]]; for(i = 0; i < m; i++) wsum[i] = 0; for(i = 0; i < n; i++) wsum[ wx[i] ]++; for(i = 1; i < m; i++) wsum[i] += wsum[i-1]; for(i = n - 1;i >= 0; i--) sa[--wsum[wx[i]]] = y[i]; //更新x t = x, x = y, y = t; for( x[sa[0]] = 0,i = 1, p = 1; i < n; i++) x[ sa[i] ] = cmp(y, sa[i-1], sa[i], j) ? p - 1 : p++; } } //h[i] = height[rank[i]], h[i] >= h[i-1] - 1 void get_height(char *r, int n) { int i, j, k = 0;//sa[0] = len 就是我们补的那个0 for(i = 1; i <= n; i++) rank[sa[i]] = i; for(i = 0; i < n ; height[rank[i++]] = k ) for( k ? k-- : 0, j = sa[rank[i]-1]; r[i+k] == r[j+k]; k++); } //计算不相同的字串 int get_diff(int n) { int num = 0; for( int i = 1; i <= n; i++) { num += n - sa[i] - height[i]; } return num; } int main( ) { int a, b, n, m; while( scanf("%s",str) != EOF ) { int len = strlen(str); str[len] = '0'; str[len+1] = 0; memset(wa,0,sizeof(wa)); memset(wb,0,sizeof(wb)); memset(sa,0,sizeof(sa)); memset(height,0,sizeof(height)); get_sa(str, sa, len + 1, 255); get_height( str, len ); pre(len); printf("%d\n",get_diff(len)); } }
求子串个数,利用height数组。。
posted on 2012-09-26 16:14 more think, more gains 阅读(381) 评论(0) 编辑 收藏 举报