poj2778 DNA Sequence(AC自动机+矩阵快速幂)
Description
It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease.
Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n.
Input
First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.
Output
An integer, the number of DNA sequences, mod 100000.
Sample Input
4 3 AT AC AG AA
Sample Output
36
题意:给你m个长度不超过10的字符串,每个字符串只有'A','T','C','G'这四种,现在让你用这四种字符拼成n个字符,问有多少种拼凑的方案,使得新的字符串不包含前面m个字符串。
思路:可以先构造m个字符串的trie图,然后把树上含有一个字符串尾节点的节点价值val标为1,其他都为0,那么对于每一个节点出边都有4条,如果我们把边看做走的下一步,那么题目就转变成在图上走n步,不能走到危险节点(即某个字符串的尾节点,也是节点val值为0的点),然后我们就想到了邻接矩阵A,用a[i][j]表示节点i和j间的边的条数,那么A的n次就是从一个点到另一个点走n步的方案数。
写代码的时候有一点要注意,如果AT中的T是危险节点,那么trie树中的CCATC的T也是危险节点,也要标记val=1,这一步在bfs的时候实现,加上这一句:" if(val[fail[x]]) val[x]=1;"
#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
#include<vector>
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<string>
#include<algorithm>
using namespace std;
typedef long long ll;
#define inf 99999999
#define pi acos(-1.0)
#define maxnode 510000
#define MOD 100000
char s[100];
int num[1006];
struct trie{
ll sz,root,val[maxnode],next[maxnode][4],fail[maxnode];
int q[1111111];
void init(){
int i;
sz=root=0;
val[0]=0;
for(i=0;i<4;i++){
next[root][i]=-1;
}
}
int idx(char c){
if(c=='A')return 0;
if(c=='C')return 1;
if(c=='T')return 2;
if(c=='G')return 3;
}
void charu(char *s){
ll i,j,u=0;
ll len=strlen(s);
for(i=0;i<len;i++){
int c=idx(s[i]);
if(next[u][c]==-1){
sz++;
val[sz]=0;
next[u][c]=sz;
u=next[u][c];
for(j=0;j<4;j++){
next[u][j]=-1;
}
}
else{
u=next[u][c];
}
}
val[u]=1;
}
void build(){
int i,j;
int front,rear;
front=1;rear=0;
for(i=0;i<4;i++){
if(next[root][i]==-1 ){
next[root][i]=root;
}
else{
fail[next[root][i] ]=root;
rear++;
q[rear]=next[root][i];
}
}
while(front<=rear){
int x=q[front];
if(val[fail[x]]) //!!!!!这里非常重要,如果一个节点的fail节点的val值存在(即以当前节点为尾节点的前缀的后缀是某一个字符串,那么该节点和fail指针指的节点一样也是危险节点)
val[x]=1;
front++;
for(i=0;i<4;i++){
if(next[x][i]==-1){
next[x][i]=next[fail[x] ][i];
}
else{
fail[next[x][i] ]=next[fail[x] ][i];
rear++;
q[rear]=next[x][i];
}
}
}
}
}ac;
struct matrix{
ll n,m,i;
ll data[105][105];
void init_danwei(){
for(i=0;i<n;i++){
data[i][i]=1;
}
}
};
matrix multi(matrix &a,matrix &b){
ll i,j,k;
matrix temp;
temp.n=a.n;
temp.m=b.m;
for(i=0;i<temp.n;i++){
for(j=0;j<temp.m;j++){
temp.data[i][j]=0;
}
}
for(i=0;i<a.n;i++){
for(k=0;k<a.m;k++){
if(a.data[i][k]>0){
for(j=0;j<b.m;j++){
temp.data[i][j]=(temp.data[i][j]+(a.data[i][k]*b.data[k][j])%MOD )%MOD;
}
}
}
}
return temp;
}
matrix fast_mod(matrix &a,ll n){
matrix ans;
ans.n=a.n;
ans.m=a.m;
memset(ans.data,0,sizeof(ans.data));
ans.init_danwei();
while(n>0){
if(n&1)ans=multi(ans,a);
a=multi(a,a);
n>>=1;
}
return ans;
}
int main()
{
ll n,m,i,j;
while(scanf("%lld%lld",&m,&n)!=EOF)
{
ac.init();
for(i=1;i<=m;i++){
scanf("%s",s);
ac.charu(s);
}
ac.build();
matrix a;
a.n=a.m=ac.sz+1;
memset(a.data,0,sizeof(a.data));
for(i=0;i<=ac.sz;i++){
for(j=0;j<4;j++){
if(ac.val[ac.next[i][j] ]==0 ){
a.data[i][ac.next[i][j] ]++;
}
}
}
matrix cnt;
cnt=fast_mod(a,n);
ll sum=0;
for(i=0;i<=cnt.n;i++){
sum=(sum+cnt.data[0][i])%MOD;
}
printf("%lld\n",sum);
}
return 0;
}