(AC自动机/矩阵快速幂)poj 2778 DNA Sequence

---恢复内容开始---

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

 

非常经典的题目。

注意到将给定的字符串建立AC自动机以后,点数最多也只有100,其中一些点为坏点(即对匹配串在AC自动机上匹配时不能到的点,若到了即含有不应该出现的字符串)。

考虑建立矩阵,第i行第j列表示从编号为i的节点下一步走到编号为j的节点的路径数。因为不能到坏点,也就更不能从坏点出发,将坏点对应的行列全部置为0。

由此得到的矩阵n次幂后第一行的值之和即为符合题意的个数。

整个过程中几个需要注意的细节:

1、原点连出的边如果连向的节点不存在,则将该边连回原点

2、某位置如果后缀是病毒,则该节点也为坏节点

 

  1 #include <cstdio>
  2 #include <iostream>
  3 #include <algorithm>
  4 #include <vector>
  5 #include <set>
  6 #include <map>
  7 #include <string>
  8 #include <cstring>
  9 #include <stack>
 10 #include <queue>
 11 #include <cmath>
 12 #include <ctime>
 13 #include <bitset>
 14 #include <utility>
 15 #include <assert.h>
 16 using namespace std;
 17 #define rank rankk
 18 #define mp make_pair
 19 #define pb push_back
 20 #define xo(a,b) ((b)&1?(a):0)
 21 #define tm tmp
 22 //#define LL ll
 23 typedef unsigned long long ull;
 24 typedef pair<int,int> pii;
 25 typedef long long ll;
 26 typedef pair<ll,int> pli;
 27 typedef pair<ll,ll> pll;
 28 const int INF=0x3f3f3f3f;
 29 const ll INFF=0x3f3f3f3f3f3f3f3fll;
 30 const int MAX=1e6+5;
 31 //const ll MAXN=2e8;
 32 //const int MAX_N=MAX;
 33 const int MOD=1e5;
 34 //const long double pi=acos(-1.0);
 35 const long double eps=1e-9;
 36 ll gcd(ll a,ll b){return b?gcd(b,a%b):a;}
 37 template<typename T>inline T abs(T a) {return a>0?a:-a;}
 38 template<class T> inline
 39 void read(T& num) {
 40     bool start=false,neg=false;
 41     char c;
 42     num=0;
 43     while((c=getchar())!=EOF) {
 44         if(c=='-') start=neg=true;
 45         else if(c>='0' && c<='9') {
 46             start=true;
 47             num=num*10+c-'0';
 48         } else if(start) break;
 49     }
 50     if(neg) num=-num;
 51 }
 52 inline ll powMM(ll a,ll b,ll M){
 53     ll ret=1;
 54     a%=M;
 55 //    b%=M;
 56     while (b){
 57         if (b&1) ret=ret*a%M;
 58         b>>=1;
 59         a=a*a%M;
 60     }
 61     return ret;
 62 }
 63 void open()
 64 {
 65 //    freopen("1009.in","r",stdin);
 66     freopen("out.txt","w",stdout);
 67 }
 68 //int类型matrix模版
 69 const int m_num=105;//矩阵的大小
 70 inline void add(int &a,int b)//保证a、b已全取过一次MOD(可以为负)
 71 {
 72     a=(a+b+MOD)%MOD;if(a<0)a+=MOD;
 73 }
 74 struct matrix
 75 {
 76     int e[m_num][m_num];
 77     int row,col;
 78     matrix(){}
 79     matrix(int _r,int _c):row(_r),col(_c){memset(e,0,sizeof(e));}
 80     matrix operator * (const matrix &tem)const
 81     {
 82         matrix ret=matrix(row,tem.col);//新形成的矩阵规模为 左行右列
 83         for(int i=1;i<=ret.row;i++)
 84             for(int j=1;j<=ret.col;j++)
 85                 for(int k=1;k<=col;k++)
 86                     add(ret.e[i][j],1LL*e[i][k]*tem.e[k][j]%MOD);
 87         return ret;
 88     }
 89     matrix operator + (const matrix &tem)const
 90     {
 91         matrix ret=matrix(row,col);
 92         for(int i=1;i<=row;i++)
 93             for(int j=1;j<=col;j++)add(ret.e[i][j],(e[i][j]+tem.e[i][j])%MOD);
 94         return ret;
 95     }
 96     void getE()//化为单位阵
 97     {
 98         for(int i=1;i<=row;i++)
 99             for(int j=1;j<=col;j++)e[i][j]=(i==j?1:0);
100     }
101 };
102 matrix m_qpow(matrix tem,int x)//矩阵快速幂
103 {
104     matrix ret=matrix(tem.row,tem.row);
105     ret.getE();
106     while(x)
107     {
108         if(x&1)ret=ret*tem;
109         x>>=1;tem=tem*tem;
110     }
111     return ret;
112 }
113 
114 
115 
116 const int SIGMA_SIZE=4;
117 const int MAXNODE=110;
118 /*
119     由于失配过程比较复杂,要反复沿着失配边走,
120     在实践中常常会把下述自动机改造一下,把所有不存在的边“补上”
121     即把计算失配函数中的语句if(!u)continue;改成
122     if(!u){ch[r][c]=ch[f[r]][c];continue;}
123     这样,就完全不需要失配函数,而是对所有转移一视同仁
124     即find函数中的while(j&&!ch[j][c])j=f[j] 可以直接删除
125 
126 */
127 struct AhoCorasickAutomata
128 {
129     int ch[MAXNODE][SIGMA_SIZE];//Trie树
130     int f[MAXNODE];//fail函数
131     int val[MAXNODE];//每个字符串的结尾结点都有非0的val
132     int num;//trie树编号(亦为包含根结点的当前size)
133     //初始化
134     void init()
135     {
136         num=1;
137         memset(ch[0],-1,sizeof(ch[0]));
138         val[0]=0;
139     }
140     //返回字符对应编号
141     int idx(char c)
142     {
143         if(c=='A')return 0;
144         else if(c=='T')return 1;
145         else if(c=='C')return 2;
146         else if(c=='G')return 3;
147     }
148     //插入权值为v的字符串
149     void insert(char *s,int v)
150     {
151         int u=0,n=strlen(s);
152         for(int i=0;i<n;i++)
153         {
154             int c=idx(s[i]);
155             if(ch[u][c]==-1)
156             {
157                 memset(ch[num],-1,sizeof(ch[num]));
158                 val[num]=0;
159                 ch[u][c]=num++;
160             }
161             u=ch[u][c];
162         }
163         val[u]=v;
164     }
165     void getFail()
166     {
167         queue <int> que;
168         f[0]=0;
169         for(int c=0;c<SIGMA_SIZE;c++)
170         {
171             int u=ch[0][c];
172             if(u!=-1)
173             {
174                 f[u]=0;que.push(u);
175             }
176             else
177                 ch[0][c]=0;//让其回到起始点
178         }
179         while(!que.empty())
180         {
181             int r=que.front();que.pop();
182             if(val[f[r]])val[r]=val[f[r]];
183             for(int c=0;c<SIGMA_SIZE;c++)
184             {
185                 int u=ch[r][c];
186                 if(u==-1){ch[r][c]=ch[f[r]][c];continue;}
187                 que.push(u);
188                 f[u]=ch[f[r]][c];
189             }
190         }
191     }
192 }AC;
193 int m,n;
194 char tem[15];
195 
196 int main()
197 {
198     while(~scanf("%d%d",&m,&n))
199     {
200         AC.init();
201         for(int i=1;i<=m;i++){scanf("%s",tem);AC.insert(tem,i);}
202         AC.getFail();
203         matrix an(AC.num,AC.num);
204         for(int i=0;i<AC.num;i++)
205         {
206             for(int j=0;j<4;j++)
207             {
208                 if(!AC.val[i]&&!AC.val[AC.ch[i][j]])
209                     an.e[i+1][AC.ch[i][j]+1]++;
210             }
211         }
212         an=m_qpow(an,n);
213         int re=0;
214         for(int i=1;i<=AC.num;i++)
215             re=(re+an.e[1][i])%MOD;
216         printf("%d\n",re);
217     }
218     return 0;
219 }

 

posted @ 2017-09-19 23:57  perplex  阅读(138)  评论(0编辑  收藏  举报