poj2778 DNA Sequence(AC自动机+矩阵快速幂）

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence，For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments.

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G，and the length of sequences is a given integer n.

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences.

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10.

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

题意：给你m个长度不超过10的字符串，每个字符串只有'A','T','C','G'这四种，现在让你用这四种字符拼成n个字符，问有多少种拼凑的方案，使得新的字符串不包含前面m个字符串。

思路：可以先构造m个字符串的trie图，然后把树上含有一个字符串尾节点的节点价值val标为1，其他都为0，那么对于每一个节点出边都有4条，如果我们把边看做走的下一步，那么题目就转变成在图上走n步，不能走到危险节点（即某个字符串的尾节点，也是节点val值为0的点），然后我们就想到了邻接矩阵A，用a[i][j]表示节点i和j间的边的条数，那么A的n次就是从一个点到另一个点走n步的方案数。

写代码的时候有一点要注意，如果AT中的T是危险节点，那么trie树中的CCATC的T也是危险节点，也要标记val=1,这一步在bfs的时候实现，加上这一句：" if(val[fail[x]]) val[x]=1;"

#include<iostream>
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
#include<vector>
#include<map>
#include<set>
#include<queue>
#include<stack>
#include<string>
#include<algorithm>
using namespace std;
typedef long long ll;
#define inf 99999999
#define pi acos(-1.0)
#define maxnode 510000
#define MOD 100000
char s[100];
int num[1006];
struct trie{
    ll sz,root,val[maxnode],next[maxnode][4],fail[maxnode];
    int q[1111111];
    void init(){
        int i;
        sz=root=0;
        val[0]=0;
        for(i=0;i<4;i++){
            next[root][i]=-1;
        }
    }
    int idx(char c){
        if(c=='A')return 0;
        if(c=='C')return 1;
        if(c=='T')return 2;
        if(c=='G')return 3;
    }
    void charu(char *s){
        ll i,j,u=0;
        ll len=strlen(s);
        for(i=0;i<len;i++){
            int c=idx(s[i]);
            if(next[u][c]==-1){
                sz++;
                val[sz]=0;
                next[u][c]=sz;
                u=next[u][c];
                for(j=0;j<4;j++){
                    next[u][j]=-1;
                }
            }
            else{
                u=next[u][c];

            }

        }
        val[u]=1;
    }

    void build(){
        int i,j;
        int front,rear;
        front=1;rear=0;
        for(i=0;i<4;i++){
            if(next[root][i]==-1 ){
                next[root][i]=root;
            }
            else{
                fail[next[root][i] ]=root;
                rear++;
                q[rear]=next[root][i];
            }
        }
        while(front<=rear){
            int x=q[front];
            if(val[fail[x]])        //!!!!!这里非常重要，如果一个节点的fail节点的val值存在(即以当前节点为尾节点的前缀的后缀是某一个字符串，那么该节点和fail指针指的节点一样也是危险节点)
                val[x]=1;
            front++;
            for(i=0;i<4;i++){
                if(next[x][i]==-1){
                    next[x][i]=next[fail[x] ][i];

                }
                else{
                    fail[next[x][i] ]=next[fail[x] ][i];
                    rear++;
                    q[rear]=next[x][i];
                }

            }
        }
    }



}ac;
struct matrix{
    ll n,m,i;
    ll data[105][105];
    void init_danwei(){
        for(i=0;i<n;i++){
            data[i][i]=1;
        }
    }
};

matrix multi(matrix &a,matrix &b){
    ll i,j,k;
    matrix temp;
    temp.n=a.n;
    temp.m=b.m;
    for(i=0;i<temp.n;i++){
        for(j=0;j<temp.m;j++){
            temp.data[i][j]=0;
        }
    }
    for(i=0;i<a.n;i++){
        for(k=0;k<a.m;k++){
            if(a.data[i][k]>0){
                for(j=0;j<b.m;j++){
                    temp.data[i][j]=(temp.data[i][j]+(a.data[i][k]*b.data[k][j])%MOD )%MOD;
                }
            }
        }
    }
    return temp;
}

matrix fast_mod(matrix &a,ll n){
    matrix ans;
    ans.n=a.n;
    ans.m=a.m;
    memset(ans.data,0,sizeof(ans.data));
    ans.init_danwei();
    while(n>0){
        if(n&1)ans=multi(ans,a);
        a=multi(a,a);
        n>>=1;
    }
    return ans;
}


int main()
{
    ll n,m,i,j;
    while(scanf("%lld%lld",&m,&n)!=EOF)
    {
        ac.init();
        for(i=1;i<=m;i++){
            scanf("%s",s);
            ac.charu(s);
        }
        ac.build();
        matrix a;
        a.n=a.m=ac.sz+1;
        memset(a.data,0,sizeof(a.data));
        for(i=0;i<=ac.sz;i++){
            for(j=0;j<4;j++){
                if(ac.val[ac.next[i][j] ]==0 ){
                    a.data[i][ac.next[i][j] ]++;
                }
            }
        }
        matrix cnt;
        cnt=fast_mod(a,n);
        ll sum=0;
        for(i=0;i<=cnt.n;i++){
            sum=(sum+cnt.data[0][i])%MOD;
        }
        printf("%lld\n",sum);
    }
    return 0;
}

posted @ 2016-02-07 16:56 Herumw 阅读(115) 评论(0) 编辑收藏举报

刷新页面返回顶部

Herumw's Blog

poj2778 DNA Sequence(AC自动机+矩阵快速幂）

公告