Poj 3294 Life Forms (后缀数组 + 二分 + Hash)
题目链接:
题目描述:
有n个文本串,问在一半以上的文本串出现过的最长连续子串?
解题思路:
可以把文本串用没有出现过的不同字符连起来,然后求新文本串的height。然后二分答案串的长度K,根据K把新文本串的后缀串分块,统计每块中的原文本串出现的次数,大于原文本串数目的一半就作为答案记录下来,对于输出字典序,height就是排好序的后缀数组,只要按照顺序输出即可。
1 #include <cstdio> 2 #include <cstring> 3 #include <iostream> 4 #include <algorithm> 5 using namespace std; 6 typedef long long LL; 7 const int maxn = 110000; 8 9 int sa[maxn], rank[maxn], height[maxn], vis[110], res[maxn]; 10 int t1[maxn], t2[maxn], r[maxn], flag[maxn], c[maxn]; 11 12 bool cmp (int *str, int a, int b, int k) 13 { 14 return str[a]==str[b] && str[a+k]==str[b+k]; 15 } 16 17 void da (int *str, int n, int m) 18 { 19 n ++; 20 int *x = t1, *y = t2, i, j; 21 22 for (i=0; i<m; i++) c[i] = 0; 23 for (i=0; i<n; i++) c[x[i]=str[i]] ++; 24 for (i=1; i<m; i++) c[i] += c[i-1]; 25 for (i=n-1; i>=0; i--) sa[-- c[x[i]]] = i; 26 27 for (j=1; j<=n; j*=2) 28 { 29 int p = 0; 30 for (i=n-j; i<n; i++) y[p++] = i; 31 for (i=0; i<n; i++) if (sa[i] >= j) y[p++] = sa[i] - j; 32 33 for (i=0; i<m; i++) c[i] = 0; 34 for (i=0; i<n; i++) c[x[y[i]]] ++; 35 for (i=1; i<m; i++) c[i] += c[i-1]; 36 for (i=n-1; i>=0; i--) sa[-- c[x[y[i]]]] = y[i]; 37 38 swap (x, y); 39 p = 1; 40 x[sa[0]] = 0; 41 for (int i=1; i<n; i++)//i是rank 42 x[sa[i]] = cmp(y, sa[i-1], sa[i], j)?p-1:p++; 43 if (p >= n) 44 break; 45 m = p; 46 } 47 48 for (i=1; i<n; i++) 49 rank[sa[i]] = i; 50 51 int k = 0; 52 n --; 53 for (int i=0; i<n; i++) 54 { 55 if (k) k --; 56 int j = sa[rank[i] - 1]; 57 while (str[i+k] == str[j+k]) k++; 58 height[rank[i]] = k; 59 } 60 } 61 62 bool Bin_sreach (int x, int k, int n) 63 { 64 int ans, num; 65 ans = num = 0; 66 memset (vis, 0, sizeof(vis)); 67 68 for (int i=2; i<=k; i++) 69 { 70 if (height[i] >= x) 71 { 72 ans += vis[flag[sa[i-1]]]?0:1; 73 vis[flag[sa[i-1]]] = 1; 74 75 ans += vis[flag[sa[i]]]?0:1; 76 vis[flag[sa[i]]] = 1; 77 } 78 else 79 { 80 if (ans*2 > n) 81 res[++ num] = sa[i-1]; 82 83 ans = 0; 84 memset (vis, 0, sizeof(vis)); 85 } 86 } 87 if (ans*2 > n) 88 res[++ num] = sa[k-1]; 89 90 if (num) 91 { 92 res[0] = num; 93 return true; 94 } 95 return false; 96 } 97 98 int main () 99 { 100 int n, l = 0; 101 char str[1010]; 102 while (scanf ("%d", &n), n) 103 { 104 if (l ++) 105 printf ("\n"); 106 107 int k = 0; 108 for (int i=0; i<n; i++) 109 { 110 scanf ("%s", str); 111 for (int j=0; str[j]; j++) 112 { 113 r[k] = str[j]; 114 flag[k++] = i;//记录k字母所在的字符串 115 } 116 r[k] = 130 + i; 117 flag[k++] = -1; 118 } 119 120 r[k] = 0; 121 da (r, k, 250); 122 123 int low = 1, high = k, mid, ans = 0; 124 while (low <= high) 125 {//二分枚举 126 mid = (low + high) / 2; 127 if (Bin_sreach(mid, k, n)) 128 { 129 ans = mid; 130 low = mid + 1; 131 } 132 else 133 high = mid - 1; 134 } 135 136 if (low == 1) 137 { 138 printf ("?\n"); 139 continue; 140 } 141 142 for (int i=1; i<=res[0]; i++) 143 { 144 for (int j=res[i]; j<res[i]+ans; j ++) 145 printf ("%c", r[j]); 146 printf ("\n"); 147 } 148 } 149 return 0; 150 }
本文为博主原创文章,未经博主允许不得转载。