后缀数组的第X种求法
后缀自动机构造后缀数组。
因为有个SB题洛谷5115,它逼迫我学习后缀数组...(边分树合并是啥?)。[update]现在这题已经有了至少两种SAM做法
一些定义:sa[i]表示字典序排第i的后缀是从哪里开始的。Rank[i]表示后缀i的排名。height[i]表示排名i和i - 1的后缀的最长公共前缀。
首先我们可以建出后缀树,然后按字典序DFS即可获得sa数组和rank数组。
接下来要求height,使用经典后缀数组的求法即可。
说一下关于后缀数组经典倍增构造方法的一些理解。关于网上流传的那个锯齿形图,其实就是把上一次的两个排名拼接起来进行排序。
height数组的构建也很神奇。按照下标求,可以发现sa[Rank[i]](它自己)和j = sa[Rank[i] - 1]和i + 1的关系:
若i和j的[1, x]这些位相同,那么i + 1和j的[2, x]这些位相同。所以height[Rank[i]]至少有x - 1。
拍过的模板......暂时没找到模板题(板子字符集居然是62...是想卡爆SAM吧)[update]把转移数组改成map之后就把LOJ的模板题A了
1 #include <bits/stdc++.h> 2 3 const int N = 200010; 4 5 int n, pw[N], ST[N][20]; 6 int tr[N][26], len[N], fail[N], tot = 1, last = 1, ed[N], Lp[N]; 7 int tr2[N][26], sa[N], Rank[N], height[N], num; 8 char str[N]; 9 10 inline void insert(char c, int id) { 11 int f = c - 'a', p = last, np = ++tot; 12 last = np; 13 ed[np] = 1; 14 Lp[np] = id; 15 len[np] = len[p] + 1; 16 while(p && !tr[p][f]) { 17 tr[p][f] = np; 18 p = fail[p]; 19 } 20 if(!p) { 21 fail[np] = 1; 22 } 23 else { 24 int Q = tr[p][f]; 25 if(len[Q] == len[p] + 1) { 26 fail[np] = Q; 27 } 28 else { 29 int nQ = ++tot; 30 Lp[nQ] = Lp[Q]; 31 len[nQ] = len[p] + 1; 32 fail[nQ] = fail[Q]; 33 fail[Q] = fail[np] = nQ; 34 memcpy(tr[nQ], tr[Q], sizeof(tr[Q])); 35 while(tr[p][f] == Q) { 36 tr[p][f] = nQ; 37 p = fail[p]; 38 } 39 } 40 } 41 return; 42 } 43 44 void DFS(int x) { 45 if(ed[x]) { 46 sa[++num] = Lp[x]; 47 Rank[Lp[x]] = num; 48 } 49 for(int i = 0; i < 26; i++) { 50 if(!tr2[x][i]) continue; 51 DFS(tr2[x][i]); 52 } 53 return; 54 } 55 56 inline void getsa() { 57 for(int i = 2; i <= tot; i++) { /// build suffix tree 58 char c = str[Lp[i] + len[fail[i]]]; 59 tr2[fail[i]][c - 'a'] = i; 60 } 61 DFS(1); /// DFS suffix tree to get SA and Rank 62 for(int i = 1, j, k = 0; i <= n; i++) { /// get height 63 j = sa[Rank[i] - 1]; 64 if(!j) continue; 65 if(k) k--; 66 while(i + k <= n && j + k <= n && str[i + k] == str[j + k]) { 67 k++; 68 } 69 height[Rank[i]] = k; 70 } 71 return; 72 } 73 74 inline void prework() { 75 for(int i = 2; i <= n; i++) pw[i] = pw[i >> 1] + 1; 76 for(int i = 1; i <= n; i++) ST[i][0] = height[i]; 77 for(int j = 1; j <= pw[n]; j++) { 78 for(int i = 1; i + (1 << j) - 1 <= n; i++) { 79 ST[i][j] = std::min(ST[i][j - 1], ST[i + (1 << (j - 1))][j - 1]); 80 } 81 } 82 return; 83 } 84 85 inline getSmall(int l, int r) { 86 if(l > r) std::swap(l, r); 87 l++; 88 int t = pw[r - l + 1]; 89 return std::min(ST[l][t], ST[r - (1 << t) + 1][t]); 90 } 91 92 int main() { 93 scanf("%s", str + 1); 94 n = strlen(str + 1); 95 for(int i = n; i >= 1; i--) { 96 insert(str[i], i); 97 } 98 getsa(); 99 prework(); 100 101 int m; 102 scanf("%d", &m); 103 for(int i = 1; i <= m; i++) { 104 int x, y; 105 scanf("%d%d", &x, &y); 106 if(x == y) { 107 printf("%d ", n - x + 1); 108 } 109 else { 110 int t = getSmall(Rank[x], Rank[y]); 111 printf("%d ", t); 112 } 113 } 114 115 return 0; 116 }
[20210808update]SAM大胜利,经过我锲而不舍的卡空间,终于在洛谷上过了SA板题,代码中大量复用数组,以及把char和节点编号压成一个int
1 #include <bits/stdc++.h> 2 typedef long long LL; 3 const int N = 2000010, M = 2500015; 4 5 int tot = 1, last = 1, Lp[N]; 6 int sa[N], num, len[N]; 7 std::bitset<N> ed; 8 9 struct Graph { 10 int e[N], tp; 11 struct Edge { 12 int nex, v; 13 Edge() {} 14 Edge(int NEX, int V) 15 : nex(NEX), v(V) {} 16 } edge[M]; 17 Graph() { 18 tp = 0; 19 memset(e, 0, sizeof(e)); 20 } 21 inline int get(int x, char c) { 22 for(int i = e[x]; i; i = edge[i].nex) { 23 if(c == edge[i].v / N) { 24 return edge[i].v % N; 25 } 26 } 27 return 0; 28 } 29 inline void set(int x, char c, int v) { 30 for(int i = e[x]; i; i = edge[i].nex) { 31 if(c == edge[i].v / N) { 32 edge[i].v = c * N + v; 33 return; 34 } 35 } 36 edge[++tp] = Edge(e[x], c * N + v); 37 e[x] = tp; 38 return; 39 } 40 } * tr2, *tr; 41 42 std::vector<int> vec2[N]; 43 44 inline bool check(char c) { 45 if(c >= '0' && c <= '9') { 46 return true; 47 } 48 if(c >= 'a' && c <= 'z') { 49 return true; 50 } 51 if(c >= 'A' && c <= 'Z') { 52 return true; 53 } 54 return false; 55 } 56 57 inline int read() { 58 char c = getchar(); 59 int top = 0; 60 while(!check(c)) { 61 c = getchar(); 62 } 63 while(check(c)) { 64 sa[++top] = c * N; 65 c = getchar(); 66 } 67 sa[top + 1] = 0; 68 return top; 69 } 70 71 inline void shift(int nQ, int Q) { 72 for(int i = tr->e[Q]; i; i = tr->edge[i].nex) { 73 tr->set(nQ, (tr->edge[i].v) / N, (tr->edge[i].v) % N); 74 } 75 } 76 77 inline void insert(char f, int id) { 78 int p = last, np = ++tot; 79 last = np; 80 Lp[np] = id; 81 len[np] = len[p] + 1; 82 ed[np] = 1; 83 while(p && !tr->get(p, f)) { 84 tr->set(p, f, np); 85 p = sa[p] % N; 86 } 87 88 if(!p) { 89 sa[np] += 1; 90 } 91 else { 92 int Q = tr->get(p, f); 93 94 if(len[Q] == len[p] + 1) { 95 sa[np] += Q; 96 } 97 else { 98 int nQ = ++tot; 99 Lp[nQ] = Lp[Q]; 100 len[nQ] = len[p] + 1; 101 sa[nQ] += sa[Q] % N; 102 sa[Q] = (sa[Q] / N) * N + nQ; 103 sa[np] += nQ; 104 // tr[nQ] = tr[Q]; 105 shift(nQ, Q); 106 107 while(tr->get(p, f) == Q) { 108 tr->set(p, f, nQ); 109 p = sa[p] % N; 110 } 111 } 112 } 113 114 return; 115 } 116 117 void DFS(int x) { 118 if(ed[x]) { 119 printf("%d ", Lp[x]); 120 } 121 122 for(int i = tr2->e[x]; i; i = tr2->edge[i].nex) { 123 DFS(tr2->edge[i].v % N); 124 } 125 126 return; 127 } 128 129 inline void getsa() { 130 delete[] tr; 131 tr2 = new Graph(); 132 for(int x = tot; x > 1; x--) { /// build suffix tree 133 char c = sa[Lp[x] + len[sa[x] % N]] / N; 134 tr2->set(sa[x] % N, c, x); 135 } 136 for(int x = 1; x <= tot; x++) { 137 std::vector<int> v; 138 for(int i = tr2->e[x]; i; i = tr2->edge[i].nex) { 139 v.push_back(tr2->edge[i].v); 140 } 141 std::sort(v.begin(), v.end()); 142 int now = 0; 143 for(int i = tr2->e[x]; i; i = tr2->edge[i].nex) { 144 tr2->edge[i].v = v[now++]; 145 } 146 } 147 DFS(1); /// DFS suffix tree to get SA and Rank 148 return; 149 } 150 151 int main() { 152 // freopen("a.in", "r", stdin); 153 // freopen("a.out", "w", stdout); 154 155 int n = read(); 156 tr = new Graph(); 157 158 for(int i = n; i >= 1; i--) { 159 insert(sa[i] / N, i); 160 } 161 162 getsa(); 163 164 return 0; 165 }