poj 2778 DNA Sequence(AC自动机 + 矩阵快速幂)
DNA Sequence
Time Limit: 1000MS | Memory Limit: 65536K | |
Total Submissions: 14426 | Accepted: 5572 |
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
/* poj 2778 DNA Sequence(AC自动机 + 矩阵快速幂) 已知一个长度为n的字符串Str由A,T,G,C组成,给你m个子串. 求不包含这些子串的Str有多少种 AC自动机建立可以构建一个关系图。然后将这个关系图转换到矩阵上面,然后利用矩阵快速幂便能 求出从a到b的方案数。 大致就是 子串中没有出现的字符都会被指向root节点。即不停地在图中查找子串,于是我们只要在 路径上不经过ed标记的危险节点即可(每个子串的最后一个节点)。 对于agc、c而言,如果我zou过 a-g-c-d 这个路径。 root / \ a c / g / c / d 由上面这个图可知 左边的d 和 右边的c都是危险节点。 但漏掉了左边上的c 所以如果fail指针指向那个节点是危险节点的话,那么当前节点也是危险节点 AC自动机:http://blog.csdn.net/niushuai666/article/details/7002823 hhh-2016-04-23 15:59:53 */ #include <iostream> #include <vector> #include <cstring> #include <string> #include <cstdio> #include <queue> #include <functional> #include <map> using namespace std; #define lson (i<<1) #define rson ((i<<1)|1) typedef long long ll; const int maxn = 40010; const int mod = 100000; struct Matrix { int len; int ma[105][105]; Matrix() {} Matrix(int L) { len = L; } }; Matrix mult(Matrix ta,Matrix tb) { Matrix tc; tc.len = ta.len; for(int i = 0; i < ta.len; i++) { for(int j = 0; j < ta.len; j++) { tc.ma[i][j] = 0; for(int k = 0; k < ta.len; k++){ tc.ma[i][j] = tc.ma[i][j]+(ll)ta.ma[i][k]*tb.ma[k][j]%mod; tc.ma[i][j] %= mod; } } } return tc; } Matrix pow_mat(Matrix a,ll n) { Matrix cnt; cnt.len = a.len; memset(cnt.ma,0,sizeof(cnt.ma)); for(int i = 0 ; i < cnt.len; i++) cnt.ma[i][i] = 1; while(n) { if(n&1) cnt = mult(cnt,a); a = mult(a,a); n >>= 1; } return cnt; } struct Tire { int nex[105][4],fail[105],ed[105]; int root,L; int newnode() { for(int i = 0; i < 4; i++) nex[L][i] = -1; ed[L++] = 0; return L-1; } void ini() { L = 0,root = newnode(); } int cha(char x) { if(x == 'A') return 0; else if(x == 'C') return 1; else if(x == 'T') return 2; else if(x == 'G') return 3; } void inser(char buf[]) { int len = strlen(buf); int now = root; for(int i = 0; i < len; i++) { int ta = cha(buf[i]); if(nex[now][ta] == -1) nex[now][ta] = newnode(); now = nex[now][ta]; } ed[now]++; } void build() { queue<int >q; fail[root] = root; for(int i = 0; i < 4; i++) if(nex[root][i] == -1) nex[root][i] = root; else { fail[nex[root][i]] = root; q.push(nex[root][i]); } while(!q.empty()) { int now = q.front(); if(ed[fail[now]]) ed[now] = 1; q.pop(); for(int i = 0; i < 4; i++) { if(nex[now][i] == -1) nex[now][i] = nex[fail[now]][i]; else { fail[nex[now][i]] = nex[fail[now]][i]; q.push(nex[now][i]); } } } } Matrix to_mat() { Matrix ta(L); memset(ta.ma,0,sizeof(ta.ma)); for(int i = 0; i < L; i++) { for(int j = 0; j < 4; j++) if(!ed[nex[i][j]]) ta.ma[i][nex[i][j]]++; } return ta; } }; Tire ac; char buf[20]; int main() { int m; ll n; while(scanf("%d%I64d",&m,&n) != EOF) { ac.ini(); for(int i = 0; i < m; i++) { scanf("%s",buf); ac.inser(buf); } ac.build(); Matrix ta = ac.to_mat(); int ans = 0; ta = pow_mat(ta,n); for(int i = 0;i < ta.len;i++) { ans = (ans+ta.ma[0][i])%mod; } printf("%d\n",ans); } return 0; }