POJ2278 DNA Sequence —— AC自动机 + 矩阵优化
题目链接:https://vjudge.net/problem/POJ-2778
DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 18479 | Accepted: 7112 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
Source
题意:
给出m个DNA序列,问长度为n且不含这m个序列的DNA有多少个?
题解:
1.把这m个序列插入到AC自动机中。
2.根据自动机中各个状态之间的关系,构成一张邻接矩阵A,但需要去除与“结束点”有关的边,这样就能保证不含有给出的序列。
3.长度为n,那么答案就是 A^n 中,初始状态那一行之和。
代码如下:
1 #include <iostream> 2 #include <cstdio> 3 #include <cstring> 4 #include <algorithm> 5 #include <vector> 6 #include <cmath> 7 #include <queue> 8 #include <stack> 9 #include <map> 10 #include <string> 11 #include <set> 12 using namespace std; 13 typedef long long LL; 14 const double EPS = 1e-6; 15 const int INF = 2e9; 16 const LL LNF = 9e18; 17 const int MOD = 1e5; 18 const int MAXN = 110+10; 19 20 int Size; 21 int Map[128]; 22 struct MA 23 { 24 int mat[110][110]; 25 void init() 26 { 27 for(int i = 0; i<Size; i++) 28 for(int j = 0; j<Size; j++) 29 mat[i][j] = (i==j); 30 } 31 }; 32 33 MA operator*(const MA &x, const MA &y) 34 { 35 MA ret; 36 memset(ret.mat, 0, sizeof(ret.mat)); 37 for(int i = 0; i<Size; i++) 38 for(int j = 0; j<Size; j++) 39 for(int k = 0; k<Size; k++) 40 ret.mat[i][j] += (1LL*x.mat[i][k]*y.mat[k][j])%MOD, ret.mat[i][j] %= MOD; 41 return ret; 42 } 43 44 MA qpow(MA x, int y) 45 { 46 MA s; 47 s.init(); 48 while(y) 49 { 50 if(y&1) s = s*x; 51 x = x*x; 52 y >>= 1; 53 } 54 return s; 55 } 56 57 struct Trie 58 { 59 const static int sz = 4, base = 'A'; 60 int next[MAXN][sz], fail[MAXN], end[MAXN]; 61 int root, L; 62 int newnode() 63 { 64 for(int i = 0; i<sz; i++) 65 next[L][i] = -1; 66 end[L++] = false; 67 return L-1; 68 } 69 void init() 70 { 71 L = 0; 72 root = newnode(); 73 } 74 void insert(char buf[]) 75 { 76 int len = strlen(buf); 77 int now = root; 78 for(int i = 0; i<len; i++) 79 { 80 if(next[now][Map[buf[i]]] == -1) next[now][Map[buf[i]]] = newnode(); 81 now = next[now][Map[buf[i]]]; 82 } 83 end[now] = true; 84 } 85 void build() 86 { 87 queue<int>Q; 88 fail[root] = root; 89 for(int i = 0; i<sz; i++) 90 { 91 if(next[root][i] == -1) next[root][i] = root; 92 else fail[next[root][i]] = root, Q.push(next[root][i]); 93 } 94 while(!Q.empty()) 95 { 96 int now = Q.front(); 97 Q.pop(); 98 end[now] |= end[fail[now]]; //当前串的后缀是否也包含单词 99 for(int i = 0; i<sz; i++) 100 { 101 if(next[now][i] == -1) next[now][i] = next[fail[now]][i]; 102 else fail[next[now][i]] = next[fail[now]][i], Q.push(next[now][i]); 103 } 104 } 105 } 106 107 int query(int n) 108 { 109 MA s; 110 memset(s.mat, 0, sizeof(s.mat)); 111 for(int i = 0; i<L; i++) 112 { 113 if(end[i]) continue; //存在单词的状态没有后继 114 for(int j = 0; j<sz; j++) 115 { 116 if(end[next[i][j]]) continue; //存在单词的状态没有前驱 117 s.mat[i][next[i][j]]++; // i到next[i][j]的路径数+1。注意,当next[i][j]==root时,路径数很可能大于1。 118 } 119 } 120 121 int ret = 0; 122 Size = L; 123 s = qpow(s, n); 124 for(int i = 0; i<L; i++) //答案为:初始状态到各个状态(包括初始状态)的路径数之和。 125 ret = (ret+s.mat[0][i])%MOD; 126 return ret; 127 } 128 }; 129 130 Trie ac; 131 char buf[20]; 132 int main() 133 { 134 Map['A'] = 0; Map['C'] = 1; Map['G'] = 2; Map['T'] = 3; //离散化 135 int n, m; 136 while(scanf("%d%d", &m,&n)!=EOF) 137 { 138 ac.init(); 139 for(int i = 1; i<=m; i++) 140 { 141 scanf("%s", buf); 142 ac.insert(buf); 143 } 144 ac.build(); 145 int ans = ac.query(n); 146 printf("%d\n", ans); 147 } 148 return 0; 149 }