字符串hash + 二分答案 - 求最长公共子串 --- poj 2774
Long Long Message
Problem's Link:http://poj.org/problem?id=2774
Mean:
求两个字符串的最长公共子串的长度。
analyse:
前面在学习后缀数组的时候已经做过一遍了,但是现在主攻字符串hash,再用字符串hash写一遍。
这题的思路是这样的:
1)取较短的串的长度作为high,然后二分答案(每次判断长度为mid=(low+high)>>1是否存在,如果存在就增加下界;不存在就缩小上界);
2)主要是对答案的判断(judge函数)。具体参看代码注释。
Time complexity:O(n)
Source code:
// Memory Time // 1347base 0MS // by : Snarl_jsb // 2014-10-04-21.16 #include<algorithm> #include<cstdio> #include<cstring> #include<cstdlib> #include<iostream> #include<vector> #include<queue> #include<stack> #include<map> #include<string> #include<climits> #include<cmath> #define ULL unsigned long long using namespace std; string s1,s2; int l1,l2,seed=131; vector<ULL> hash; bool judge(int x) { hash.clear(); ULL tmp=0; for (int i = 0; i < x; i++) { tmp=tmp* seed + s1[i]; } hash.push_back(tmp); ULL base =1; for (int i = 1; i < x; i++) { base *= seed; } for (int i = x; i < l1; i++) { tmp=(tmp*seed+s1[i])-base*s1[i-x]*seed; hash.push_back(tmp); } sort(hash.begin(),hash.end()); ULL hashval = 0; for (int i = 0; i < x; i++) { hashval = hashval * seed + s2[i]; } if (binary_search(hash.begin(),hash.end(),hashval)) return 1; for (int i = x; i < l2; i++) { hashval = (hashval-(s2[i-x])*base)*seed+s2[i]; if (binary_search(hash.begin(),hash.end(),hashval)) return 1; } return 0; } int main() { while (cin>>s1>>s2) { l1=s1.size(); l2=s2.size(); int ans = 0; int high = min(l1,l2); int low = 0; while (low <= high) { int mid = (low+high)>>1; if (judge(mid)) { ans = mid; low = mid+1; } else high = mid-1; } printf("%d\n",ans); } return 0; }
注释代码:
// Memory Time // 1347k 0MS // by : Snarl_jsb // 2014-10-04-21.16 #include<algorithm> #include<cstdio> #include<cstring> #include<cstdlib> #include<iostream> #include<vector> #include<queue> #include<stack> #include<map> #include<string> #include<climits> #include<cmath> #define ULL unsigned long long using namespace std; string s1,s2; int l1,l2,seed=131; vector<ULL> hash; bool judge(int x) { hash.clear();//多组数据时不要忘了清空全局数组 //构造s1串的hash表 ULL tmp=0; for (int i = 0; i < x; i++) { tmp=tmp* seed + s1[i]; } hash.push_back(tmp); ULL base =1; for (int i = 1; i < x; i++)//求出到达x的base值 { base *= seed; } for (int i = x; i < l1; i++) { tmp=(tmp*seed+s1[i])-base*s1[i-x]*seed; hash.push_back(tmp); } //构造完毕 sort(hash.begin(),hash.end()); //二分查找加速,必需先排序 ULL hashval = 0; for (int i = 0; i < x; i++)//求出s2串0到x的hash值 { hashval = hashval * seed + s2[i]; } if (binary_search(hash.begin(),hash.end(),hashval))//查找s2串0到x的hash值是否在s1串的hash表中 return 1; for (int i = x; i < l2; i++)//如果上面的s2串0到x的hash值未匹配成功,这儿接着匹配s2串长度为x的hash值是否出现在s1串的hash表中 { hashval = hashval*seed+s2[i]-s2[i-x]*base*seed; if (binary_search(hash.begin(),hash.end(),hashval)) return 1; } return 0; } int main() { while (cin>>s1>>s2) { l1=s1.size(); l2=s2.size(); int ans = 0; int low=0,high = min(l1,l2); while (low <= high)//二分答案 { int mid = (low+high)>>1; if (judge(mid))//判断答案是否可行 { ans = mid; low = mid+1; } else high = mid-1; } printf("%d\n",ans); } return 0; }