寒假集训日志(四)——字符串算法(KMP,Manacher,AC自动机)

  今天只做了三道题,早中晚各一道,晚上本来还可以多做一道,但是自己懈怠了,以后一定不能这样了,坚持完成每天的任务。

  今天的主要内容:

  1.KMP算法:用来比对字符串或数字串的高效算法(总觉得还可以再进行优化。。。)

  2.Manacher算法:用来求回文字串的

  3.字典树(Trie):这个基本也就是一种数据结构了,涉及了树的构建,插入,遍历

  4.AC自动机:其实也就是KMP算法和Trie的结合

  虽然基本弄懂了原理,但是基本也就是照着模板打了一遍,自己还是太弱了一点。

对比算法: KMP算法(难点在于next表的构建)

回文子串算法:Manacher算法:详细http://blog.csdn.net/ggggiqnypgjg/article/details/6645824/。

AC自动机: http://www.cnblogs.com/Booble/archive/2010/12/05/1897121.html

 

A - Number Sequence(KMP算法模板题)
Time Limit:5000MS     Memory Limit:32768KB     64bit IO Format:%I64d & %I64u
Submit Status

Description

Given two sequences of numbers : a[1], a[2], ...... , a[N], and b[1], b[2], ...... , b[M] (1 <= M <= 10000, 1 <= N <= 1000000). Your task is to find a number K which make a[K] = b[1], a[K + 1] = b[2], ...... , a[K + M - 1] = b[M]. If there are more than one K exist, output the smallest one.
 

Input

The first line of input is a number T which indicate the number of cases. Each case contains three lines. The first line is two numbers N and M (1 <= M <= 10000, 1 <= N <= 1000000). The second line contains N integers which indicate a[1], a[2], ...... , a[N]. The third line contains M integers which indicate b[1], b[2], ...... , b[M]. All integers are in the range of [-1000000, 1000000].
 

Output

For each test case, you should output one line which only contain K described above. If no such K exists, output -1 instead.
 

Sample Input

2 13 5 1 2 1 2 3 1 2 3 1 3 2 1 2 1 2 3 1 3 13 5 1 2 1 2 3 1 2 3 1 3 2 1 2 1 2 3 2 1
 

Sample Output

6 -1
#include<iostream>
#include<cstdio>
#include<cmath>
#include<algorithm>
#include<cstring>
#include<iomanip>
#include<fstream>
typedef long long LL;
using namespace std;
const int maxN = 1000000;

int target[maxN];
int pattern[maxN];
int a[maxN];
 int n, m;
//我的这段代码属于方便理解型, 也是自己打的,关键部分还未透彻
//更简略版的见师兄模板
void getNext(){ int i , t; i = 1 , t = 0; a[1] = 0 ; while( i < m){ while( t > 0 && pattern[i -1] != pattern[t-1]){ t = a[t]; } t++ ; i++; if( pattern[i-1] == pattern[t-1]){ a[i] = a[t]; } else{ a[i] = t; } } } int main() { int T; cin>>T; while( T--){ cin>>n>>m; for( int i = 0 ; i<n; ++i){ scanf("%d", &target[i]); } for( int i =0 ; i< m ; ++i){ scanf("%d", & pattern[i]); } getNext(); /* for( int i = 0 ; i<= m; ++i){ cout<<a[i]<<" "; } cout<<endl;*/ int j=0 , flag =-1; for( int i = 0; i <n ; ++i){ if( target[i] == pattern[j]){ j++; if( j == m ){ flag =i - m +2; break; } } else{ j = a[j +1]; } } cout<<flag<<endl; } return 0; }

 

B - 最长回文
Time Limit:2000MS     Memory Limit:32768KB     64bit IO Format:%I64d & %I64u
Submit Status

Description

给出一个只由小写英文字符a,b,c...y,z组成的字符串S,求S中最长回文串的长度.
回文就是正反读都是一样的字符串,如aba, abba等
 

Input

输入有多组case,不超过120组,每组输入为一行小写英文字符a,b,c...y,z组成的字符串S
两组case之间由空行隔开(该空行不用处理)
字符串长度len <= 110000
 

Output

每一行一个整数x,对应一组case,表示该组case的字符串中所包含的最长回文长度.
 

Sample Input

aaaa abab
 

Sample Output

4 3
    #include<cstdio>  
    #include<cstring>  
    #include<algorithm>  
    using namespace std;  
    const int N = 220005;  
    char str[N];  
    int p[N];  
      
    void manacher(char *s, int len)  
    {  
        p[0] = 1;  
        int mmax = 0, id = 0;  
        for(int i = 1; i < len; i++) {  
            p[i] = mmax > i ? min(p[id*2-i], mmax - i) : 1;  
            while(s[i+p[i]] == s[i-p[i]]) p[i]++;  
            if(i + p[i] > id + p[id]) {  
                id = i;  
                mmax = i + p[i];  
            }  
        }  
    }  
      
    int main()  
    {  
        while(~scanf("%s",str)) {  
            int len = strlen(str);  
            for(int i = len; i >= 0; i--) {  
                str[(i<<1) + 1] = '#';  
                str[(i<<1) + 2] = str[i];  
            }  
            str[0] = '*'; //防止数组越界  
            len = len * 2 + 2;  
            manacher(str, len);  
            int ans = 0;  
            for(int i = 0; i < len; i++)  
                ans = max(ans, p[i]-1);  
            printf("%d\n", ans);  
        }  
        return 0;  
    }  
C - 统计难题
Time Limit:2000MS     Memory Limit:65535KB     64bit IO Format:%I64d & %I64u
Submit Status

Description

Ignatius最近遇到一个难题,老师交给他很多单词(只有小写字母组成,不会有重复的单词出现),现在老师要他统计出以某个字符串为前缀的单词数量(单词本身也是自己的前缀).
 

Input

输入数据的第一部分是一张单词表,每行一个单词,单词的长度不超过10,它们代表的是老师交给Ignatius统计的单词,一个空行代表单词表的结束.第二部分是一连串的提问,每行一个提问,每个提问都是一个字符串.

注意:本题只有一组测试数据,处理到文件结束.
 

Output

对于每个提问,给出以该字符串为前缀的单词的数量.
 

Sample Input

banana band bee absolute acm ba b band abc
 

Sample Output

2 3 1 0
#include<iostream>
#include<cstdio>
#include<cmath>
#include<algorithm>
#include<cstring>
#include<iomanip>
#include<fstream>
typedef long long LL;
using namespace std;

const int maxN  = 26;
int counter = 0;

typedef struct node{
    struct node *child[maxN];
    int cnt ;
}node;
//node Memory[1000000];
node *root ;

node  *createNew (){
    node *p ;
    node *newNode = new node;
    p = newNode;
   // p = &Memory[counter++];
    p->cnt = 1;
    for( int i = 0 ; i < maxN ; ++i){   //此处不一样
        p->child[i] = NULL;
    }
    return p;
}

void Insert ( char *str){
        node *current = NULL;
        node *newnode = NULL;
        int len = strlen( str);
        current = root ;
        for( int i = 0 ; i < len ; ++i){
            int index = str[i] - 'a';
            if( current->child[index] == NULL){
                newnode = createNew();
                current->child[index] = newnode;
                current = newnode;
            }
            else{
                current = current->child[index];
                (current->cnt)++;
            }
        }
}

int find_word( char *str){
    node *current = NULL;
    int len = strlen(str);
    current = root ;
    for( int i = 0 ; i< len ; ++i){
        int index = str[i] - 'a' ;
        if( current->child[index] == NULL){
            return 0;
        }
        else{
            current  = current ->child[index];
        }
    }
    return current->cnt;
}

void release( node *root)
{
    int i ;
   // if( root == NULL)  return ;
    for( i = 0 ; i< maxN ; ++i){
        if( root->child[i] !=NULL)
            release( root->child[i] );
    }
    free (root );
    root = NULL;
}

int main()
{
    char s[12];
    root = createNew() ;
    while( cin.getline(s,11) && s[0]!='\0'){
        Insert( s);
    }
    while( cin.getline( s,11) ){
        printf("%d\n", find_word( s) );
    }
    release( root);
    return 0;
}

Keywords Search(AC自动机模板)

Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 131072/131072 K (Java/Others)
Total Submission(s): 47860    Accepted Submission(s): 15250


Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.
 

Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
 

Output
Print how many keywords are contained in the description.
 

Sample Input
1 5 she he say shr her yasherhs
 

Sample Output
3
    #include<stdio.h>  
    #include<string.h>  
    #include<malloc.h>  
    #include<queue>  
    using namespace std;  
    char str[1000000+100];  
      
    struct node  
    {  
        int count;  
        struct node *next[26];  
        struct node *fail;  
        void init()  
        {  
            int i;  
            for(i=0;i<26;i++)  
                next[i]=NULL;  
            count=0;  
            fail=NULL;  
        }  
    }*root;  
    void insert()  
    {  
        int len,k;  
        node *p=root;  
        len=strlen(str);  
        for(k=0;k<len;k++)  
        {  
            int pos=str[k]-'a';  
            if(p->next[pos]==NULL)  
            {  
                p->next[pos]=new node;  
                p->next[pos]->init();  
                p=p->next[pos];  
            }  
            else   
                p=p->next[pos];  
        }  
        p->count++;  
    }  
    void getfail()  
    {  
        int i;  
           node *p=root,*son,*temp;  
           queue<struct node *>que;  
           que.push(p);   
           while(!que.empty())  
           {  
               temp=que.front();  
               que.pop();  
               for(i=0;i<26;i++)  
               {  
                   son=temp->next[i];  
                   if(son!=NULL)  
                   {  
                       if(temp==root) {son->fail=root;}  
                       else  
                       {  
                           p=temp->fail;  
                           while(p)  
                           {  
                               if(p->next[i])  
                               {  
                                   son->fail=p->next[i];  
                                   break;  
                               }  
                               p=p->fail;  
                           }  
                           if(!p)  son->fail=root;  
                       }  
                       que.push(son);  
                   }  
               }  
           }  
    }  
    void query()  
    {  
        int len,i,cnt=0;  
        len=strlen(str);  
        node *p,*temp;  
        p=root;  
        for(i=0;i<len;i++)  
        {  
            int pos=str[i]-'a';  
            while(!p->next[pos]&&p!=root)  p=p->fail;        
            p=p->next[pos];//  
            if(!p) p=root;//  
            temp=p;  
            /*不要用*temp=*p  因为*p表示一个node,而*temp也表示一个node 但是由于*temp没有分配空间 所以是不能进行赋值的 但是可以用temp指针去指向p*/  
            while(temp!=root)  
            {  
                if(temp->count>=0)   
                {  
                    cnt+=temp->count;  
                    temp->count=-1;    
                }  
                else break;   
                temp=temp->fail;   
            }  
        }  
        printf("%d\n",cnt);  
    }  
    int main()  
    {  
        int cas,n;  
        scanf("%d",&cas);  
        while(cas--)  
        {  
            root=new node;  
            root->init();  
            root->fail=NULL;  
            scanf("%d",&n);  
            int i;  
            getchar();  
            for(i=0;i<n;i++)  
            {  
                gets(str);  
                insert();  
            }  
            getfail();  
            gets(str);  
            query();  
        }  
        return 0;  
    }  

 



ACzidongji 
posted @ 2016-01-25 16:29  W2W  阅读(173)  评论(0编辑  收藏  举报