后缀数组 + LCP加速多模式匹配算法 O(m+logn)

  1 #include <iostream>
  2 #include <cstdio>
  3 #include <cstring>
  4 #include <algorithm>
  5 #include <vector>
  6 using namespace std;
  7 const int maxn = 200;
  8 char s[maxn];
  9 int sa[maxn], t[maxn], t2[maxn], c[maxn];
 10 int n;
 11 //构造字符串s的后缀数组, 每个字符值必须为0 ~ m-1
 12 void build_sa(int m) {
 13     int *x = t, *y = t2;
 14     //基数排序
 15     for(int i = 0; i < m; i++) c[i] = 0;
 16     for(int i = 0; i < n; i++) c[x[i] = s[i]]++;
 17     for(int i = 1; i < m; i++) c[i] += c[i-1];
 18     for(int i = n-1; i >= 0; i--) sa[--c[x[i]]] = i;
 19     for(int k = 1; k <= n; k <<= 1) {
 20         int p = 0;
 21         //直接利用sa数组排序第二关键字
 22         for(int i = n-k; i < n; i++) y[p++] = i;
 23         for(int i = 0; i < n; i++) if(sa[i] >= k) y[p++] = sa[i] - k;
 24         //基数排序第一关键字
 25         for(int i = 0; i < m; i++) c[i] = 0;
 26         for(int i = 0; i < n; i++) c[x[y[i]]]++;
 27         for(int i = 1; i < m; i++) c[i] += c[i-1];
 28         for(int i = n-1; i>= 0; i--) sa[--c[x[y[i]]]] = y[i];
 29         //根据sa和y数组计算新的x数组
 30         swap(x, y);
 31         p = 1;
 32         x[sa[0]] = 0;
 33         for(int i = 1; i < n; i++)
 34             x[sa[i]] = (y[sa[i-1]] == y[sa[i]] && y[sa[i-1]+k] == y[sa[i]+k] ? p-1 : p++);
 35         if(p >= n) break;
 36         m = p;
 37     }
 38 }
 39 
 40 int rank_[maxn]; //rank[i]代表后缀i在sa数组中的下标
 41 int height[maxn]; //height[i] 定义为sa[i-1] 和 sa[i] 的最长公共前缀
 42 //后缀j和k的LCP长度等于RMQ(height, rank[j]+1, rank[k])
 43 void get_height() {
 44     int i, j, k = 0;
 45     for(int i = 0; i < n; i++) rank_[sa[i]] = i;
 46     for(int i = 0; i < n; i++) {
 47         if(!rank_[i]) continue;
 48         int j = sa[rank_[i]-1];
 49         if(k) k--;
 50         
 51         while(s[i+k] == s[j+k]) k++;
 52         height[rank_[i]] = k;
 53     }
 54 }
 55 int d[maxn][50];
 56 void rmq_init() {
 57     for(int i = 0; i < n; i++) d[i][0] = height[i];
 58     for(int j = 1; (1<<j) <= n; j++)
 59         for(int i = 0; i + (1<<j) - 1 < n; i++)
 60             d[i][j] = min(d[i][j-1], d[i+(1<<(j-1))][j-1]);
 61 }
 62 int rmq(int l, int r) {
 63     if(l == r) return n-l;
 64     if(rank_[l] > rank_[r]) swap(l, r);
 65     int L = rank_[l]+1;
 66     int R = rank_[r];
 67     int k = 0;
 68     while((1<<(k+1)) <= R-L+1) k++;
 69     return min(d[L][k], d[R-(1<<k)+1][k]);
 70 }
 71 //LCP加速多模式匹配
 72 int m;
 73 int cmp_suffix(char* P, int p, int c,int &k) {
 74     k = 0;
 75     int i;
 76     for(i = 0; P[c+i] == s[sa[p]+c+i]; i++) {
 77         if(P[c+i] == '\0')
 78             return 0;
 79         k++;
 80     }
 81     if(P[c+i] == '\0')
 82         return 0;
 83     return P[c+i] - s[sa[p]+c+i];
 84 }
 85 vector<int> A; 
 86 void b_search(char*P, int L, int R) { 
 87     int k;
 88     if(cmp_suffix(P, L, 0, k) < 0) return ;
 89     if(cmp_suffix(P, R, 0, k) > 0) return ;
 90     int c = 0, rr = 0;
 91     int lst = -1;
 92     k = 0;
 93     while(R >= L) {
 94         int M = L + (R-L)/2;
 95         if(lst != -1) c = rmq(lst, sa[M]);
 96         if(c <= k) {
 97             int res = cmp_suffix(P, M, c, k);
 98             rr = res;
 99             if(!res) {
100                 A.push_back(sa[M]);
101                 b_search(P, L, M-1);
102                 b_search(P, M+1, R);
103                 return;
104             }
105             lst = sa[M];
106             if(res < 0) R = M-1; else L = M+1;
107         }
108         else if(rr < 0)R = M-1;
109         else L = M+1;
110     }
111 }
112 void find(char* P) {  //找到全部的匹配位置存入A数组中
113     A.clear();
114     m = strlen(P);
115     int L = 0, R = n-1;
116     b_search(P, L, R);
117     sort(A.begin(), A.end());
118 }

 

posted @ 2016-12-08 11:37  kiraa  阅读(2014)  评论(2编辑  收藏  举报