Aho_Corasick 的一些练习(hdu 2222; Poj 1625; Poj2778; hdu2457; Hdu3247)
其实也没有做很多题,但还是发出来(能骗骗访问量。。。)
Hdu 2222
//本代码有误(有反例, 但是ac了)……但希望有人能告诉我真的是数据水吗。。。。
#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
const int SZ = 26;
const int MX_N = 1e6 + 10;
#define FILL(a, b) memset(a, b, sizeof a)
#define rep(i, s, t) for(register int i = s; i <= t; ++i)
using namespace std;
template <class T>
T read(T x = 0) {
char c = getchar();
while(c < '0' || c > '9') c = getchar();
while(c >= '0' && c <= '9')
x = x*10 + c-'0', c = getchar();
return x;
}
namespace Aho_Corasick {
queue<int> Q;
int ch[MX_N][SZ], tot;
int val[MX_N], fail[MX_N];
void init() {
FILL(fail, 0), tot = 0;
FILL(val, 0), FILL(ch, 0);
}
void insert(char *s) {
int u = 0;
for(; *s; ++s) {
int x = *s - 'a';
if(!ch[u][x]) ch[u][x] = ++tot;
u = ch[u][x];
}++val[u];
}
void Bfs_fail() {
Q.push(0);
while(!Q.empty()) {
int u = Q.front(), v; Q.pop();
rep(i, 0, 25) if((v=ch[u][i])) {
int t = fail[u];
if(u) {
for(; t && !ch[t][i]; t = fail[t]);
fail[v] = ch[t][i];
}Q.push(v);
}else ch[u][i] = ch[fail[u]][i];
}
}
int Query(char *T) {
int u = 0, ret = 0;
for(; *T; ++T) {
int x = *T - 'a';
for(int v = (u=ch[u][x]); val[v]; v = fail[v])
ret += val[v], val[v] = 0;
//这个地方错了, 这个代码是错的,for循环的终止条件改成“v”,这样思路上没错但好像T了, 应该要对访问过的点记一个mark才算真正过了;
}return ret;
}
}using namespace Aho_Corasick;
int main() {
#ifndef ONLINE_JUDGE
freopen("input.in", "r", stdin);
freopen("res.out", "w", stdout);
#endif
int T = read<int>();
char s[55], t[MX_N];
while(T--) {
init();
int n = read<int>();
rep(i, 1, n) scanf("%s", s), insert(s);
Bfs_fail();
scanf("%s", t);
printf("%d\n", Query(t));
}
return 0;
}
//模板题代码就是给大家参考一下
POJ 1625 Censored!
/*
给n个字母,构成长度为m的串,总共有n^m种。
给p个字符串,问n^m种字符串中不包含(不是子串)这p个字符串的个数。
*/
#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
const int SZ = 1e2 + 10;
const int MX_N = 50 + 10;
const int MX_Node = 3e2 + 10;
#define US unsigned char
#define Fill(a, b) memset(a, b, sizeof a)
#define rep(i, s, t) for(register int i = s; i <= t; ++i)
#define dec(i, s, t) for(register int i = s; i >= t; --i)
using namespace std;
template <class T>
T read(T x = 0) {
char c = getchar();
while(c < '0' || c > '9') c = getchar();
while(c >= '0' && c <= '9')
x = x*10 + c-'0', c = getchar();
return x;
}
int n, m, q;
namespace Aho_Corasick {
int tot;
queue<int> Q;
int ch[MX_Node][SZ], val[MX_Node], fail[MX_Node];
int Map, hash[MX_Node];
void Hash(US *s) {
Map = n-1;
rep(i, 0, Map) hash[s[i]] = i;
}
void init() {
tot = 0;
Fill(ch, 0), Fill(val, 0);
Fill(fail, 0), Fill(hash, 0);
}
void insert(US *s) {
int u = 0;
for(; *s; ++s) {
int x = hash[*s];
if(!ch[u][x]) ch[u][x] = ++tot;
u = ch[u][x];
}val[u] = 1;
}
void Bfs_fail() {
Q.push(0);
while(!Q.empty()) {
int u = Q.front(), v; Q.pop();
rep(i, 0, Map) if((v=ch[u][i])) {
int t = fail[u];
if(u) {
for(; t && !ch[t][i]; t = fail[t]);
fail[v] = ch[t][i];
}Q.push(v);
}else ch[u][i] = ch[fail[u]][i];
val[u] |= val[fail[u]];
}
}
}using namespace Aho_Corasick;
struct Bn {
int x[MX_N], len;
Bn() { memset(x, 0, sizeof x), len = 0; }
void print() {
printf("%d", x[len]);
dec(i, len-1, 1) printf("%.4d", x[i]);
puts("");
}
}temp;
const int Mod = 1e4;
Bn operator + (Bn a, Bn b) {
Fill(temp.x, 0);
temp.len = max(a.len, b.len);
rep(i, 1, temp.len) {
temp.x[i] += a.x[i] + b.x[i];
if(temp.x[i] >= Mod)
temp.x[i+1] += temp.x[i] / Mod, temp.x[i] %= Mod;
}while(temp.x[temp.len+1]) ++temp.len;
return temp;
}
Bn f[MX_N][MX_Node], Ans;
void solve() {
rep(i, 0, MX_N-1)rep(j, 0, MX_Node-1)
Fill(f[i][j].x, 0), f[i][j].len = 1;
Fill(Ans.x, 0), Ans.len = 1;
f[0][0].x[1] = 1, f[0][0].len = 1;
rep(i, 1, m)rep(u, 0, tot) if(!val[u])
rep(k, 0, Map) {
int v = ch[u][k];
if(!val[v]) f[i][v] = f[i][v] + f[i-1][u];
}
rep(i, 0, tot) if(!val[i])
Ans = Ans + f[m][i];
Ans.print();
}
int main() {
#ifndef ONLINE_JUDGE
freopen("input.in", "r", stdin);
freopen("res.out", "w", stdout);
#endif
US s[MX_N];
while(scanf("%d%d%d", &n, &m, &q) == 3) {
init();
scanf("%s", s), Hash(s);
while(q--) {
scanf("%s", s);
insert(s);
}Bfs_fail();
solve();
}
return 0;
}
//比较恶心的是要写高精,以及字符串的读入必须用unsigned char不然会弄出负数
//状态的设置f[i][j] -> len = i, 以自动机的j节点结尾的方案
//MX_N, MX_Node这些写得好像有点丑不要介意
POJ 2778. DNA Sequence
//同上题,只是输入格式及数据范围不同,以及给了模数!!!!
#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
const int Mod = 1e5;
const int Sz = 3;
const int MX_Node = 1e2 + 10;
#define Fill(a, b) memset(a, b, sizeof a)
#define rep(i, s, t) for(register int i = s; i <= t; ++i)
#define dec(i, s, t) for(register int i = s; i >= t; --i)
using namespace std;
template <class T>
T read(T x = 0) {
char c = getchar();
while(c < '0' || c > '9') c = getchar();
while(c >= '0' && c <= '9')
x = x*10 + c-'0', c = getchar();
return x;
}
namespace Aho_Corasick {
int tot;
queue<int> Q;
int ch[MX_Node][Sz + 1], val[MX_Node], fail[MX_Node];
int hash[MX_Node<<1];
void Hash() {
hash['T'] = 0, hash['A'] = 1;
hash['G'] = 2, hash['C'] = 3;
}
void init() {
tot = 0;
Fill(ch, 0), Fill(val, 0);
Fill(fail, 0), Fill(hash, 0);
}
void insert(char *s) {
int u = 0;
for(; *s; ++s) {
int x = hash[*s];
if(!ch[u][x]) ch[u][x] = ++tot;
u = ch[u][x];
}val[u] = 1;
}
void Bfs_fail() {
Q.push(0);
while(!Q.empty()) {
int u = Q.front(), v; Q.pop();
rep(i, 0, Sz) if((v=ch[u][i])) {
if(u) {
int t = fail[u];
for(; t && !ch[t][i]; t = fail[t]);
fail[v] = ch[t][i];
}Q.push(v);
}else ch[u][i] = ch[fail[u]][i];
val[u] |= val[fail[u]];
}
}
}using namespace Aho_Corasick;
struct Matrix {
long long G[110][110];
Matrix() {Fill(G, 0);}
}unit;
Matrix operator * (Matrix a, Matrix b) {
Matrix c;
rep(i, 0, tot)rep(j, 0, tot) {
rep(k, 0, tot)
c.G[i][j] += a.G[i][k] * b.G[k][j];
c.G[i][j] %= Mod;
}
return c;
}
Matrix operator ^ (Matrix a, long long b) {
Matrix ret = a;
for(--b; b; b >>= 1LL, a = a*a)
if(b & 1LL) ret = ret * a;
return ret;
}
void build_matrix() {
rep(i, 0, tot)rep(j, 0, Sz)
unit.G[i][ch[i][j]] += !val[i] && !val[ch[i][j]];
}
int n;
long long m;
int main() {
#ifndef ONLINE_JUDGE
freopen("input.in", "r", stdin);
freopen("res.out", "w", stdout);
#endif
static char s[MX_Node];
scanf("%d%lld", &n, &m);
Hash();
while(n--) {
scanf("%s", s);
insert(s);
}
Bfs_fail();
build_matrix();
unit = unit ^ m;
long long Ans = 0;
rep(i, 0, tot) Ans = (Ans + unit.G[0][i]) % Mod;
printf("%lld\n", Ans);
return 0;
}
//“类似”flyod矩阵的一种东西, 方案数可以用矩阵累加, 其实就是对于矩阵里的每个点, 就是计算从别的点累计过来的方案
HDU 2457. DNA repair/POJ 3691
//给出一些DNA病毒串及一个基因序列,求最少的修改使基因序列不包含病毒串
#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
const int oo = 1e8;
const int size = 4;
const int mx_n = 1e3 + 10;
const int mx_node = 5e2 + 10;
#define Fill(a, b) memset(a, b, sizeof a)
#define rep(i, s, t) for(register int i = s; i <= t; ++i)
using namespace std;
namespace Aho_Corasick {
queue<int> Q;
int tot, h[mx_node];
int ch[mx_node][size], val[mx_node], fail[mx_node];
void init() {
tot = 0, Fill(ch, 0);
Fill(val, 0), Fill(fail, 0);
}
void Hash() {
h['C'] = 0;
h['A'] = 1;
h['T'] = 2;
h['G'] = 3;
}
void insert(char *s) {
int u = 0;
for(; *s; ++s) {
int x = h[*s];
if(!ch[u][x]) ch[u][x] = ++tot;
u = ch[u][x];
}val[u] = 1;
}
void Bfs_fail() {
Q.push(0);
while(!Q.empty()) {
int u = Q.front(), v; Q.pop();
rep(i, 0, size-1) if((v=ch[u][i])) {
if(u) {
int t = fail[u];
for(; t && !ch[t][i]; t = fail[t]);
fail[v] = ch[t][i];
}Q.push(v);
}else ch[u][i] = ch[fail[u]][i];
val[u] |= val[fail[u]];
}
}
}using namespace Aho_Corasick;
char T[mx_n];
int f[mx_n][mx_node];
void solve() {
scanf("%s", T);
int len = strlen(T), Ans = oo;
rep(i, 0, len)rep(j, 0, tot)
f[i][j] = oo; f[0][0] = 0;
rep(i, 0, len-1) {
int idx = h[T[i]], v;
rep(u, 0, tot) if(!val[u] && f[i][u] ^ oo)
rep(x, 0, size-1) if(!val[(v=ch[u][x])]) {
if(x ^ idx) f[i+1][v] = min(f[i][u] + 1, f[i+1][v]);
else f[i+1][v] = min(f[i][u], f[i+1][v]);
}
}rep(i, 0, tot) Ans = min(Ans, f[len][i]);
printf("%d\n", Ans <= len? Ans : -1);
}
int main() {
#ifndef ONLINE_JUDGE
freopen("input.in", "r", stdin);
freopen("res.out", "w", stdout);
#endif
int Case = 0, n;
Hash();
while(scanf("%d", &n) && n) {
char s[50];
init();
while(n--) {
scanf("%s", s);
insert(s);
}Bfs_fail();
printf("Case %d: ", ++Case);
solve();
}
return 0;
}
//对于状态的设置 f[i][j],
//len = i 的串, 在ac自动机上能匹配到以节点j结尾的最少修改数
//转移的话比较显然。。。。。
Hdu3247
#include <algorithm>
#include <iostream>
#include <cstring>
#include <cstdio>
#include <queue>
const int oo = 5e8;
const int mx_n = 12;
const int mx_s = (1 << 11) + 1;
const int mx_node = 6e4 + 10;
#define Fill(a, b) memset(a, b, sizeof a)
#define rep(i, s, t) for(register int i = s; i <= t; ++i)
using namespace std;
int n, m;
namespace Aho_Corasick {
queue<int> Q;
int tot, fail[mx_node];
int ch[mx_node][2], val[mx_node][2];
void init() {
tot = 0;
Fill(fail, 0), Fill(ch, 0), Fill(val, 0);
}
void insert(char *s, int k) {
int u = 0;
for(; *s; ++s) {
int x = *s - '0';
if(!ch[u][x]) ch[u][x] = ++tot;
u = ch[u][x];
}if(!k) val[u][0] = 1;
else val[u][1] = 1 << k;
}
void Bfs_fail() {
Q.push(0);
while(!Q.empty()) {
int u = Q.front(), v; Q.pop();
rep(i, 0, 1) if((v=ch[u][i])) {
if(u) fail[v] = ch[fail[u]][i];
Q.push(v);
}else ch[u][i] = ch[fail[u]][i];
val[u][0] |= val[fail[u]][0];
val[u][1] |= val[fail[u]][1];
}
}
}using namespace Aho_Corasick;
int G[mx_n][mx_n], dis[mx_node];
int f[mx_s][mx_n], p[mx_n], sz;
void Pre_dist(int u) {
rep(i, 0, tot) dis[i] = oo;
Q.push(p[u]), dis[p[u]] = 0;
while(!Q.empty()) {
int u = Q.front(), v; Q.pop();
rep(i, 0, 1)
if(dis[v=ch[u][i]] >= oo && !val[v][0])
dis[v] = dis[u] + 1, Q.push(v);
}rep(i, 0, sz) G[u][i] = dis[p[i]];
return ;
}
void solve() {
rep(i, 0, (1<<(n+1))-1)rep(j, 0, sz)
f[i][j] = oo;
f[1][0] = 0;
rep(i, 1, (1<<(n+1))-1)
rep(j, 0, sz) {
if(f[i][j] >= oo) continue;
rep(k, 0, sz)
f[i|val[p[k]][1]][k] = min(f[i|val[p[k]][1]][k], f[i][j] + G[j][k]);
}
int Ans = oo;
rep(i, 0, sz) Ans = min(Ans, f[(1<<(n+1))-1][i]);
printf("%d\n", Ans);
return ;
}
int main() {
#ifndef ONLINE_JUDGE
freopen("input.in", "r", stdin);
freopen("res.out", "w", stdout);
#endif
char s[1000 + 10];
while(scanf("%d%d", &n, &m) == 2 && n, m) {
init();
rep(i, 1, n) scanf("%s", s), insert(s, i);
rep(i, 1, m) scanf("%s", s), insert(s, 0);
Bfs_fail();
sz = 0, Fill(G, 0);
rep(i, 1, tot) if(val[i][1])
p[++sz] = i;
rep(i, 0, sz) Pre_dist(i);
solve();
}
return 0;
}
//状压转移,用BFS求出两串不过病毒的最短距离,记得要加入Root不然会出问题的
//提供一份思路比较清晰的代码,虽然我的状态是从1开始存的