poj 3415 Common Substrings
Common Substrings
Time Limit: 5000MS | Memory Limit: 65536K | |
Total Submissions: 12585 | Accepted: 4228 |
Description
A substring of a string T is defined as:
Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):
You are to give the value of |S| for specific A, B and K.
Input
The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.
1 ≤ |A|, |B| ≤ 105
1 ≤ K ≤ min{|A|, |B|}
Characters of A and B are all Latin letters.
Output
For each case, output an integer |S|.
Sample Input
2 aababaa abaabaa 1 xx xx 0
Sample Output
22 5
题意:求两个字符串的长度大于k的子串的数量
思路:其实就是求两个字符串当中的任意两个后缀的相同前缀的数量,设lcp是任意两个后缀的相同前缀的最大长度,那么这两个后缀的长度大于K的相同前缀数量为lcp-K+1.
直接枚举两个字符串的所有后缀并累加他们的前缀数量复杂度在O(n^2)行不通。
可以利用单调栈。首先把两个字符串s1,s2进行合并,中间可以加个不同的字符(譬如'$')来区别,即s=s1+'$'+s2 ,求s的后缀数组和高度数组。
首先任意两个后缀,记它们在后缀数组中位置分别为i,j,则它们的高度lcp可以表示为min(lcp[i],lcp[i+1],...,lcp[j-1]),既然如此,可以用单调栈来维护lcp
对于s2的每一个后缀B,考虑所有字典序在B前面的s1的后缀Ai,计算所有Ai与B的相同前缀的数量和,可以用单调栈优化。对于s1中的每个后缀A,计算Bi与A的相同前缀数量和与之前是类似的。
在高度数组当中把高度大于等于K的连续的序列分成一块,一块一块的用单调栈考虑,具体见代码:
AC代码:
#define _CRT_SECURE_NO_DEPRECATE #include<iostream> #include<algorithm> #include<vector> #include<cstring> #include<string> #include<cmath> using namespace std; const int INF = 0x3f3f3f3f; const int N_MAX = 100000 + 20; typedef long long ll; int n, k; int Rank[N_MAX*2]; int tmp[N_MAX*2]; int sa[N_MAX * 2]; int lcp[N_MAX*2]; bool compare_sa(const int& i,const int& j) { if (Rank[i] != Rank[j])return Rank[i] < Rank[j]; else { int ri = i + k <= n ? Rank[i + k] : -1; int rj = j + k <= n ? Rank[j + k] : -1; return ri < rj; } } void construct_sa(const string& S,int *sa) { n = S.size(); for (int i = 0; i <= n;i++) { sa[i] = i; Rank[i] = i < n ? S[i] : -1; } for (k = 1; k <= n;k*=2) { sort(sa,sa+n+1,compare_sa); tmp[sa[0]] = 0; for (int i = 1; i <= n;i++) { tmp[sa[i]] = tmp[sa[i - 1]] + (compare_sa(sa[i - 1], sa[i]) ? 1 : 0); } for (int i = 0; i <= n;i++) { Rank[i] = tmp[i]; } } } void construct_lcp(const string& S,int *sa,int *lcp){ memset(lcp,0,sizeof(lcp)); int n = S.length(); for (int i = 0; i <= n; i++)Rank[sa[i]] = i; int h = 0; lcp[0] = 0; for (int i = 0; i < n; i++) { int j = sa[Rank[i] - 1]; if (h > 0)h--; for (; j + h < n&&i + h < n; h++) { if (S[j + h] != S[i + h])break; } lcp[Rank[i] - 1] = h; } } int K; string s1, s2, s; ll top, accumu; int stack[N_MAX * 2][2];//1存放人数,0存放lcp ll find_num(int sz1,bool is_s1) { ll res = 0; top = accumu = 0; for (int i = 0; i < s.size(); i++) { if (lcp[i] < K) { top = 0; accumu = 0; } else { int size = 0;//统计高度为lcp[i]的人数 if ((is_s1&&sa[i] < sz1) || (!is_s1&&sa[i] > sz1)) {//如果是s1中的后缀 size++; accumu += lcp[i] - K + 1; } while (top>0&&lcp[i]<=stack[top-1][0]) {//前面的lcp高度比较高,则要削减高度直到和lcp[i]一样,这样之前的那些人的高度也变成lcp[i]了 top--; accumu -= stack[top][1] * (stack[top][0] - lcp[i]); size += stack[top][1]; } if (size) { stack[top][0] = lcp[i]; stack[top][1] = size; top++;//!!! } if ((is_s1&&sa[i+1] > sz1) || (!is_s1&&sa[i+1] < sz1)) {//sa[i+1]是s2中的后缀!!! res += accumu; } } } return res; } int main() { while (scanf("%d",&K)&&K) { cin >> s1 >> s2; int sz1 = s1.size(); int sz2 = s2.size(); s = s1 + '$' + s2; construct_sa(s,sa); construct_lcp(s,sa,lcp); printf("%lld\n",find_num(sz1,1)+find_num(sz1,0)); } return 0; }