后缀数组 + LCP加速多模式匹配算法 O(m+logn)
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 #include <vector> 6 using namespace std; 7 const int maxn = 200; 8 char s[maxn]; 9 int sa[maxn], t[maxn], t2[maxn], c[maxn]; 10 int n; 11 //构造字符串s的后缀数组, 每个字符值必须为0 ~ m-1 12 void build_sa(int m) { 13 int *x = t, *y = t2; 14 //基数排序 15 for(int i = 0; i < m; i++) c[i] = 0; 16 for(int i = 0; i < n; i++) c[x[i] = s[i]]++; 17 for(int i = 1; i < m; i++) c[i] += c[i-1]; 18 for(int i = n-1; i >= 0; i--) sa[--c[x[i]]] = i; 19 for(int k = 1; k <= n; k <<= 1) { 20 int p = 0; 21 //直接利用sa数组排序第二关键字 22 for(int i = n-k; i < n; i++) y[p++] = i; 23 for(int i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i] - k; 24 //基数排序第一关键字 25 for(int i = 0; i < m; i++) c[i] = 0; 26 for(int i = 0; i < n; i++) c[x[y[i]]]++; 27 for(int i = 1; i < m; i++) c[i] += c[i-1]; 28 for(int i = n-1; i>= 0; i--) sa[--c[x[y[i]]]] = y[i]; 29 //根据sa和y数组计算新的x数组 30 swap(x, y); 31 p = 1; 32 x[sa[0]] = 0; 33 for(int i = 1; i < n; i++) 34 x[sa[i]] = (y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k] ? p-1 : p++); 35 if(p >= n) break; 36 m = p; 37 } 38 } 39 40 int rank_[maxn]; //rank[i]代表后缀i在sa数组中的下标 41 int height[maxn]; //height[i] 定义为sa[i-1] 和 sa[i] 的最长公共前缀 42 //后缀j和k的LCP长度等于RMQ(height, rank[j]+1, rank[k]) 43 void get_height() { 44 int i, j, k = 0; 45 for(int i = 0; i < n; i++) rank_[sa[i]] = i; 46 for(int i = 0; i < n; i++) { 47 if(!rank_[i]) continue; 48 int j = sa[rank_[i]-1]; 49 if(k) k--; 50 51 while(s[i+k] == s[j+k]) k++; 52 height[rank_[i]] = k; 53 } 54 } 55 int d[maxn][50]; 56 void rmq_init() { 57 for(int i = 0; i < n; i++) d[i][0] = height[i]; 58 for(int j = 1; (1<<j) <= n; j++) 59 for(int i = 0; i + (1<<j) - 1 < n; i++) 60 d[i][j] = min(d[i][j-1], d[i+(1<<(j-1))][j-1]); 61 } 62 int rmq(int l, int r) { 63 if(l == r) return n-l; 64 if(rank_[l] > rank_[r]) swap(l, r); 65 int L = rank_[l]+1; 66 int R = rank_[r]; 67 int k = 0; 68 while((1<<(k+1)) <= R-L+1) k++; 69 return min(d[L][k], d[R-(1<<k)+1][k]); 70 } 71 //LCP加速多模式匹配 72 int m; 73 int cmp_suffix(char* P, int p, int c,int &k) { 74 k = 0; 75 int i; 76 for(i = 0; P[c+i] == s[sa[p]+c+i]; i++) { 77 if(P[c+i] == '\0') 78 return 0; 79 k++; 80 } 81 if(P[c+i] == '\0') 82 return 0; 83 return P[c+i] - s[sa[p]+c+i]; 84 } 85 vector<int> A; 86 void b_search(char*P, int L, int R) { 87 int k; 88 if(cmp_suffix(P, L, 0, k) < 0) return ; 89 if(cmp_suffix(P, R, 0, k) > 0) return ; 90 int c = 0, rr = 0; 91 int lst = -1; 92 k = 0; 93 while(R >= L) { 94 int M = L + (R-L)/2; 95 if(lst != -1) c = rmq(lst, sa[M]); 96 if(c <= k) { 97 int res = cmp_suffix(P, M, c, k); 98 rr = res; 99 if(!res) { 100 A.push_back(sa[M]); 101 b_search(P, L, M-1); 102 b_search(P, M+1, R); 103 return; 104 } 105 lst = sa[M]; 106 if(res < 0) R = M-1; else L = M+1; 107 } 108 else if(rr < 0)R = M-1; 109 else L = M+1; 110 } 111 } 112 void find(char* P) { //找到全部的匹配位置存入A数组中 113 A.clear(); 114 m = strlen(P); 115 int L = 0, R = n-1; 116 b_search(P, L, R); 117 sort(A.begin(), A.end()); 118 }