寒假集训日志（四）——字符串算法（KMP，Manacher，AC自动机）

　　今天只做了三道题，早中晚各一道，晚上本来还可以多做一道，但是自己懈怠了，以后一定不能这样了，坚持完成每天的任务。

　　今天的主要内容：

　　1.KMP算法：用来比对字符串或数字串的高效算法（总觉得还可以再进行优化。。。）

　　2.Manacher算法：用来求回文字串的

　　3.字典树(Trie)：这个基本也就是一种数据结构了，涉及了树的构建，插入，遍历

　　4.AC自动机：其实也就是KMP算法和Trie的结合

　　虽然基本弄懂了原理，但是基本也就是照着模板打了一遍，自己还是太弱了一点。

对比算法： KMP算法（难点在于next表的构建）

回文子串算法：Manacher算法：详细http://blog.csdn.net/ggggiqnypgjg/article/details/6645824/。

AC自动机： http://www.cnblogs.com/Booble/archive/2010/12/05/1897121.html

A - Number Sequence（KMP算法模板题）

Time Limit:5000MS Memory Limit:32768KB 64bit IO Format:%I64d & %I64u

Submit Status

Description

Given two sequences of numbers : a[1], a[2], ...... , a[N], and b[1], b[2], ...... , b[M] (1 <= M <= 10000, 1 <= N <= 1000000). Your task is to find a number K which make a[K] = b[1], a[K + 1] = b[2], ...... , a[K + M - 1] = b[M]. If there are more than one K exist, output the smallest one.

Input

The first line of input is a number T which indicate the number of cases. Each case contains three lines. The first line is two numbers N and M (1 <= M <= 10000, 1 <= N <= 1000000). The second line contains N integers which indicate a[1], a[2], ...... , a[N]. The third line contains M integers which indicate b[1], b[2], ...... , b[M]. All integers are in the range of [-1000000, 1000000].

Output

For each test case, you should output one line which only contain K described above. If no such K exists, output -1 instead.

Sample Input

2 13 5 1 2 1 2 3 1 2 3 1 3 2 1 2 1 2 3 1 3 13 5 1 2 1 2 3 1 2 3 1 3 2 1 2 1 2 3 2 1

Sample Output

6 -1

#include<iostream>
#include<cstdio>
#include<cmath>
#include<algorithm>
#include<cstring>
#include<iomanip>
#include<fstream>
typedef long long LL;
using namespace std;
const int maxN = 1000000;

int target[maxN];
int pattern[maxN];
int a[maxN];
 int n, m;
//我的这段代码属于方便理解型， 也是自己打的，关键部分还未透彻
//更简略版的见师兄模板
void getNext(){
    int i , t;
    i = 1 , t = 0;
    a[1] = 0 ;
    while( i < m){
        while( t > 0 && pattern[i -1] != pattern[t-1]){
            t = a[t];
        }
        t++ ;   i++;
        if( pattern[i-1] == pattern[t-1]){
            a[i] = a[t];
        }
        else{
            a[i] = t;
        }
    }
}

int main()
{
    int T;
    cin>>T;
    while( T--){
        cin>>n>>m;
        for( int i = 0 ; i<n; ++i){
            scanf("%d", &target[i]);
        }
        for( int i =0 ;  i< m ; ++i){
            scanf("%d", & pattern[i]);
        }
        getNext();
       /* for( int i = 0 ; i<= m; ++i){
            cout<<a[i]<<"  ";
        }
        cout<<endl;*/
        int j=0 , flag =-1;
        for( int i = 0; i <n ; ++i){
            if( target[i] == pattern[j]){
               j++;
               if( j == m ){
                flag =i - m +2;
                break;
                }
            }
            else{
                j = a[j +1];
            }
        }
        cout<<flag<<endl;
    }
return 0;
}
 
B - 最长回文
 Time Limit:2000MS     Memory Limit:32768KB     64bit IO Format:%I64d & %I64uSubmit Status
Description
 给出一个只由小写英文字符a,b,c...y,z组成的字符串S,求S中最长回文串的长度. 
回文就是正反读都是一样的字符串,如aba, abba等 
      

          
      
Input

        输入有多组case,不超过120组,每组输入为一行小写英文字符a,b,c...y,z组成的字符串S 
两组case之间由空行隔开(该空行不用处理) 
       
字符串长度len <= 110000 

          
      
Output

        每一行一个整数x,对应一组case,表示该组case的字符串中所包含的最长回文长度. 
       

          
      
Sample Input
aaaa

abab 
 
Sample Output
4
3 

    #include<cstdio>  
    #include<cstring>  
    #include<algorithm>  
    using namespace std;  
    const int N = 220005;  
    char str[N];  
    int p[N];  
      
    void manacher(char *s, int len)  
    {  
        p[0] = 1;  
        int mmax = 0, id = 0;  
        for(int i = 1; i < len; i++) {  
            p[i] = mmax > i ? min(p[id*2-i], mmax - i) : 1;  
            while(s[i+p[i]] == s[i-p[i]]) p[i]++;  
            if(i + p[i] > id + p[id]) {  
                id = i;  
                mmax = i + p[i];  
            }  
        }  
    }  
      
    int main()  
    {  
        while(~scanf("%s",str)) {  
            int len = strlen(str);  
            for(int i = len; i >= 0; i--) {  
                str[(i<<1) + 1] = '#';  
                str[(i<<1) + 2] = str[i];  
            }  
            str[0] = '*'; //防止数组越界  
            len = len * 2 + 2;  
            manacher(str, len);  
            int ans = 0;  
            for(int i = 0; i < len; i++)  
                ans = max(ans, p[i]-1);  
            printf("%d\n", ans);  
        }  
        return 0;  
    }  
C - 统计难题
 Time Limit:2000MS     Memory Limit:65535KB     64bit IO Format:%I64d & %I64uSubmit Status
Description
Ignatius最近遇到一个难题,老师交给他很多单词(只有小写字母组成,不会有重复的单词出现),现在老师要他统计出以某个字符串为前缀的单词数量(单词本身也是自己的前缀).
 
Input
 输入数据的第一部分是一张单词表,每行一个单词,单词的长度不超过10,它们代表的是老师交给Ignatius统计的单词,一个空行代表单词表的结束.第二部分是一连串的提问,每行一个提问,每个提问都是一个字符串. 

注意:本题只有一组测试数据,处理到文件结束. 
       

          
      
Output

        对于每个提问,给出以该字符串为前缀的单词的数量. 
       

          
      
Sample Input
banana
band
bee
absolute
acm

ba
b
band
abc 
 
Sample Output
2
3
1
0 

#include<iostream>
#include<cstdio>
#include<cmath>
#include<algorithm>
#include<cstring>
#include<iomanip>
#include<fstream>
typedef long long LL;
using namespace std;

const int maxN  = 26;
int counter = 0;

typedef struct node{
    struct node *child[maxN];
    int cnt ;
}node;
//node Memory[1000000];
node *root ;

node  *createNew (){
    node *p ;
    node *newNode = new node;
    p = newNode;
   // p = &Memory[counter++];
    p->cnt = 1;
    for( int i = 0 ; i < maxN ; ++i){   //此处不一样
        p->child[i] = NULL;
    }
    return p;
}

void Insert ( char *str){
        node *current = NULL;
        node *newnode = NULL;
        int len = strlen( str);
        current = root ;
        for( int i = 0 ; i < len ; ++i){
            int index = str[i] - 'a';
            if( current->child[index] == NULL){
                newnode = createNew();
                current->child[index] = newnode;
                current = newnode;
            }
            else{
                current = current->child[index];
                (current->cnt)++;
            }
        }
}

int find_word( char *str){
    node *current = NULL;
    int len = strlen(str);
    current = root ;
    for( int i = 0 ; i< len ; ++i){
        int index = str[i] - 'a' ;
        if( current->child[index] == NULL){
            return 0;
        }
        else{
            current  = current ->child[index];
        }
    }
    return current->cnt;
}

void release( node *root)
{
    int i ;
   // if( root == NULL)  return ;
    for( i = 0 ; i< maxN ; ++i){
        if( root->child[i] !=NULL)
            release( root->child[i] );
    }
    free (root );
    root = NULL;
}

int main()
{
    char s[12];
    root = createNew() ;
    while( cin.getline(s,11) && s[0]!='\0'){
        Insert( s);
    }
    while( cin.getline( s,11) ){
        printf("%d\n", find_word( s) );
    }
    release( root);
    return 0;
}
Keywords Search(AC自动机模板）Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 131072/131072 K (Java/Others)
Total Submission(s): 47860    Accepted Submission(s): 15250


Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every
 image have a long description, when users type some keywords to find 
the image, the system will match the keywords with description of image 
and show the image which the most keywords be matched.
To simplify 
the problem, giving you a description of image, and some keywords, you 
should tell me how many keywords will be match.
 

Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
 

Output
Print how many keywords are contained in the description.
 

Sample Input
1
5
she
he
say
shr
her
yasherhs
 

Sample Output
3

    #include<stdio.h>  
    #include<string.h>  
    #include<malloc.h>  
    #include<queue>  
    using namespace std;  
    char str[1000000+100];  
      
    struct node  
    {  
        int count;  
        struct node *next[26];  
        struct node *fail;  
        void init()  
        {  
            int i;  
            for(i=0;i<26;i++)  
                next[i]=NULL;  
            count=0;  
            fail=NULL;  
        }  
    }*root;  
    void insert()  
    {  
        int len,k;  
        node *p=root;  
        len=strlen(str);  
        for(k=0;k<len;k++)  
        {  
            int pos=str[k]-'a';  
            if(p->next[pos]==NULL)  
            {  
                p->next[pos]=new node;  
                p->next[pos]->init();  
                p=p->next[pos];  
            }  
            else   
                p=p->next[pos];  
        }  
        p->count++;  
    }  
    void getfail()  
    {  
        int i;  
           node *p=root,*son,*temp;  
           queue<struct node *>que;  
           que.push(p);   
           while(!que.empty())  
           {  
               temp=que.front();  
               que.pop();  
               for(i=0;i<26;i++)  
               {  
                   son=temp->next[i];  
                   if(son!=NULL)  
                   {  
                       if(temp==root) {son->fail=root;}  
                       else  
                       {  
                           p=temp->fail;  
                           while(p)  
                           {  
                               if(p->next[i])  
                               {  
                                   son->fail=p->next[i];  
                                   break;  
                               }  
                               p=p->fail;  
                           }  
                           if(!p)  son->fail=root;  
                       }  
                       que.push(son);  
                   }  
               }  
           }  
    }  
    void query()  
    {  
        int len,i,cnt=0;  
        len=strlen(str);  
        node *p,*temp;  
        p=root;  
        for(i=0;i<len;i++)  
        {  
            int pos=str[i]-'a';  
            while(!p->next[pos]&&p!=root)  p=p->fail;        
            p=p->next[pos];//  
            if(!p) p=root;//  
            temp=p;  
            /*不要用*temp=*p  因为*p表示一个node，而*temp也表示一个node 但是由于*temp没有分配空间 所以是不能进行赋值的 但是可以用temp指针去指向p*/  
            while(temp!=root)  
            {  
                if(temp->count>=0)   
                {  
                    cnt+=temp->count;  
                    temp->count=-1;    
                }  
                else break;   
                temp=temp->fail;   
            }  
        }  
        printf("%d\n",cnt);  
    }  
    int main()  
    {  
        int cas,n;  
        scanf("%d",&cas);  
        while(cas--)  
        {  
            root=new node;  
            root->init();  
            root->fail=NULL;  
            scanf("%d",&n);  
            int i;  
            getchar();  
            for(i=0;i<n;i++)  
            {  
                gets(str);  
                insert();  
            }  
            getfail();  
            gets(str);  
            query();  
        }  
        return 0;  
    }  
 


ACzidongji 

posted @ 2016-01-25 16:29 W2W 阅读(173) 评论(0) 编辑收藏举报

刷新页面返回顶部

W2W

做当下我认为正确的事情

寒假集训日志（四）——字符串算法（KMP，Manacher，AC自动机）

Keywords Search(AC自动机模板）

公告