HDU 2457 DNA repair(AC自动机+DP)

DNA repair

Time Limit: 5000/2000 MS (Java/Others)    Memory Limit: 32768/32768 K (Java/Others)
Total Submission(s): 2791    Accepted Submission(s): 1494


Problem Description
Biologists finally invent techniques of repairing DNA that contains segments causing kinds of inherited diseases. For the sake of simplicity, a DNA is represented as a string containing characters 'A', 'G' , 'C' and 'T'. The repairing techniques are simply to change some characters to eliminate all segments causing diseases. For example, we can repair a DNA "AAGCAG" to "AGGCAC" to eliminate the initial causing disease segments "AAG", "AGC" and "CAG" by changing two characters. Note that the repaired DNA can still contain only characters 'A', 'G', 'C' and 'T'.

You are to help the biologists to repair a DNA by changing least number of characters.
 

 

Input
The input consists of multiple test cases. Each test case starts with a line containing one integers N (1 ≤ N ≤ 50), which is the number of DNA segments causing inherited diseases.
The following N lines gives N non-empty strings of length not greater than 20 containing only characters in "AGCT", which are the DNA segments causing inherited disease.
The last line of the test case is a non-empty string of length not greater than 1000 containing only characters in "AGCT", which is the DNA to be repaired.

The last test case is followed by a line containing one zeros.
 

 

Output
For each test case, print a line containing the test case number( beginning with 1) followed by the
number of characters which need to be changed. If it's impossible to repair the given DNA, print -1.
 

 

Sample Input
2 AAA AAG AAAG 2 A TG TGAATG 4 A G C T AGT 0
 

 

Sample Output
Case 1: 1 Case 2: 4 Case 3: -1
 
分析:结果会随我们修改的字母不同,而出现动态的变化,所以要用到DP找到最小值
     dp[i][j]表示到字符串的第i个字母为止,当前在自动机的结点j的最小需要修改的字母数量
     这些病毒,字符串不能包含它们,也就是不能匹配到它们的最后一个字母.用end数组来记录不能被匹配的"地雷区",
     这里还要用到AC自动机 fail数组的性质,也就是不断指向拥有最长公共前缀的部分.
     比如我们的字符串现在匹配到了某个部分,字典树到了 acbc的位置上,那么我们判断这里没有以acbc结尾的病毒后,
     还需要判断是否有以 cbc  bc c结尾的病毒,那么fail数组就能帮我们到达最长公共前缀的部分。
代码如下:
#include <stdio.h>
#include <algorithm>
#include <iostream>
#include <string.h>
#include <queue>
using namespace std;
#define INF 0x3f3f3f3f
char buf[1010];
struct Trie
{
    int Next[1010][4];
    int fail[1010];
    bool end[1010];
    int root,L;
    int dp[1010][1010];
    int newnode()
    {
        for(int i = 0;i < 4;i++)
            Next[L][i] = -1;
        end[L++] = false;
        return L-1;
    }
    void init()
    {
        L = 0;
        root = newnode();
    }
    int get_id(char ch)
    {
        if(ch=='A')
        return 0;
        else if(ch=='G')
        return 1;
        else if(ch=='C')
        return 2;
        else if(ch=='T')
        return 3;
    }
    void insert(char buf[])
    {
        int len = strlen(buf);
        int now = root;
        for(int i = 0;i < len;i++)
        {
            if(Next[now][get_id(buf[i])] == -1)
                Next[now][get_id(buf[i])] = newnode();
            now = Next[now][get_id(buf[i])];
        }
        end[now]=true;
    }
    void build()
    {
        queue<int>Q;
        fail[root] = root;
        for(int i = 0;i < 4;i++)
            if(Next[root][i] == -1)
                Next[root][i] = root;
            else
            {
                fail[Next[root][i]] = root;
                Q.push(Next[root][i]);
            }
        while( !Q.empty() )
        {
            int now = Q.front();
            Q.pop();
            if(end[fail[now]])end[now]=true;//有以最小公共前缀结尾的,那么也为"地雷区";
            for(int i = 0;i < 4;i++)
                if(Next[now][i] == -1)
                    Next[now][i] = Next[fail[now]][i];//该段的最后一个节点匹配后,跳到拥有最大公共后缀的fail节点继续匹配
                else
                {
                    fail[Next[now][i]]=Next[fail[now]][i];//当前节点的fail节点等于它前驱节点的fail节点的后继节点
                    Q.push(Next[now][i]);
                }
        }
    }
    int solve(char buf[])
    {
        int minn=INF;
     int   len=strlen(buf);
        for(int i=0;i<=len;i++)
            for(int j=0;j<L;j++)
            dp[i][j]=INF;
         dp[0][0]=0;
         for(int i=0;i<len;i++)
             for(int j=0;j<L;j++)
             {
               if(dp[i][j]<INF)
               {
                for(int k=0;k<4;k++)
                {
                  int rr=Next[j][k];
                  if(end[rr])continue;
                  if(get_id(buf[i])==k)
                  dp[i+1][rr]=min(dp[i+1][rr],dp[i][j]);
                  else
                  dp[i+1][rr]=min(dp[i+1][rr],dp[i][j]+1);
                }
               }
             }
         for(int j=0;j<L;j++)
         minn=min(minn,dp[len][j]);
         if(minn>=INF)
        return -1;
        else
        return minn;
    }
    void debug()
    {
        for(int i = 0;i < L;i++)
        {
            printf("id = %3d,fail = %3d,end = %3d,chi = [",i,fail[i],end[i]);
            for(int j = 0;j < 4;j++)
                printf("%2d",Next[i][j]);
            printf("]\n");
        }
    }
};
Trie ac;
int main()
{
    int  t,n,ans,Case=0;;
       while(scanf("%d",&n)!=EOF)
    {
        Case++;
        if(n==0)break;
        ac.init();
       for(int i=0;i<n;i++)
       {
           scanf("%s",buf);
           ac.insert(buf);
       }
       ac.build();
         scanf("%s",buf);
         printf("Case %d: %d\n",Case,ac.solve(buf));
    }
    return 0;
}

 

 
    
 
    
 
posted @ 2017-09-22 23:42  hinata_hajime  阅读(163)  评论(0编辑  收藏  举报