hdu 2222 Keywords Search

Keywords Search

http://acm.hdu.edu.cn/showproblem.php?pid=2222

Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 131072/131072 K (Java/Others)


Problem Description
In the modern time, Search engine came into the life of everybody like Google, Baidu, etc.
Wiskey also wants to bring this feature to his image retrieval system.
Every image have a long description, when users type some keywords to find the image, the system will match the keywords with description of image and show the image which the most keywords be matched.
To simplify the problem, giving you a description of image, and some keywords, you should tell me how many keywords will be match.
 
Input
First line will contain one integer means how many cases will follow by.
Each case will contain two integers N means the number of keywords and N keywords follow. (N <= 10000)
Each keyword will only contains characters 'a'-'z', and the length will be not longer than 50.
The last line is the description, and the length will be not longer than 1000000.
 
Output
Print how many keywords are contained in the description.
 
Sample Input
1
5
she
he
say
shr
her
yasherhs
 
Sample Output
3
 题目大意:输出多个模式串在指定字符串中出现的次数
AC自动机模板
 
#include<cstdio>
#include<cstring>
#include<queue>
using namespace std;
int n,t,tot=1,len;
int v[500001],f[500001];
char s[51],a[10000001];
int trie[500001][27];
queue<int>q;
void insert()
{
    int root=1;
    for(int i=0;i<len;i++)
    {
        int id=s[i]-'a'+1;
        if(!trie[root][id])  trie[root][id]=++tot;
        root=trie[root][id];
    }
    v[root]++;
}

void get_fail()
{
    for(int i=1;i<=26;++i) trie[0][i]=1;
    q.push(1);
    int now,j;
    while(!q.empty())
    {
        now=q.front();
        q.pop();
        for(int i=1;i<=26;++i) 
            if(!trie[now][i]) trie[now][i]=trie[f[now]][i];
            else
            {
                q.push(trie[now][i]);
                j=f[now];
                f[trie[now][i]]=trie[j][i];
            }
    }
}
void work()
{
    int root=1,ans=0;
    for(int i=0;i<len;i++)
    {
        int id=a[i]-'a'+1;
        root=trie[root][id];
            int j=root;
            while(j) 
            {
                ans+=v[j];
                v[j]=0;
                j=f[j];       
            }
    }
    printf("%d\n",ans);
}
void pre()
{
    memset(v,0,sizeof(v));
    memset(trie,0,sizeof(trie));
    memset(f,0,sizeof(f));
    tot=1;
}
int main()
{
    scanf("%d",&t);
    while(t--)
    {
        pre();
        scanf("%d",&n);
        for(int i=1;i<=n;i++) 
        {
            scanf("%s",s);
            len=strlen(s);
            insert();
        }
        get_fail();
        scanf("%s",a);
        len=strlen(a);
        work();
    }
}

 

 

 

开始没有写mark数组,if(!mark[j]) 写的是 if(v[j])

 

数据有缺陷所以A了

 

但会被这组数据卡:

 

5
aba
bab
ab
ba
ababa
abababababababab

 

正确答案是5,错误输出是4

 

构造出trie树会发现,bab末尾有单词标记,但后一个b是由ababa中的后一个b的失配指针指过去的

 

但后者不是单次结尾,所以v[]=0,这就漏了bab

 

加点儿自己的理解:

#include<cstdio>
#include<cstring>
#include<queue>
using namespace std;
int n,t,tot=1,/*以1号节点为trie中根节点,不是0号*/len;
int v[500001],f[500001];
bool mark[500001];
char s[51],a[10000001];
int trie[500001][27];
queue<int>q;
void insert()//构造trie树 
{
    int root=1;
    for(int i=0;i<len;i++)
    {
        int id=s[i]-'a'+1;
        if(!trie[root][id])  trie[root][id]=++tot;
        root=trie[root][id];
    }
    v[root]++;
}
void getfail()//bfs构造失配指针 
{for(int i=1;i<=26;i++) trie[0][i]=1;//0号节点(虚拟节点)的所有边都指向1  
    q.push(1);
    while(!q.empty())
    {
        int now=q.front();
        for(int i=1;i<=26;i++)//给now的子节点构造失配指针 
        {
            if(!trie[now][i]) continue;
            q.push(trie[now][i]);
            int j=f[now];//父节点失配指针指向的点 
           // while(!trie[j][i]) j=f[j];
//从父节点失配指针指向的点开始,一直找,直至找到自己的失配指针应指向的点,
//因为判断的是j有没有子节点,所以此句结束后,j=自己失配指针应该指向的点的父节点 
            f[trie[now][i]]=trie[j][i];
//=左边:给now的子节点构造失配指针  =右边:j是失配指针应该指向的点的父节点,所以是trie[j][i] 
        }
        q.pop();
    }
}
void work()
{
    int root=1,ans=0;
    for(int i=0;i<len;i++)
    {
        int id=a[i]-'a'+1;
        mark[root]=true;
//mark数组的作用:确保每一个节点都会访问到他的失配节点 
        while(!trie[root][id]) root=f[root];
//root的子节点里没有点id,找root的失配指针
//执行完此句后,root的子节点里有目标点id 
        root=trie[root][id];//转到目标点 
        if(!mark[root]) 
        {
            int j=root;
            while(j) 
//例:模式串:she  he  要匹配的串:she 
//在找完路径s-h-e后,到达trie的底部,而此时还有he出现在了要匹配的串中,所以要沿着失配指针一直找
//假设p的失配指针指向点u,那么满足性质:
//把路径1——u构成的字符串称为前缀m,路径1——p构成字符串中所有后缀称为ni
//满足m与ni是最大的 
            {
                ans+=v[j];
                v[j]=0;//防止对一个模式串重复匹配 
                j=f[j];       
            }
        }
    }
    printf("%d\n",ans);
}
void pre()
{
    memset(v,0,sizeof(v));
    memset(trie,0,sizeof(trie));
    memset(f,0,sizeof(f));
     memset(mark,0,sizeof(mark));
    tot=1;
}
int main()
{
    scanf("%d",&t);
    while(t--)
    {
        pre();
        scanf("%d",&n);
        for(int i=1;i<=n;i++) 
        {
            scanf("%s",s);
            len=strlen(s);
            insert();
        }
        getfail();
        scanf("%s",a);
        len=strlen(a);
        work();
    }
}

 

 

 

 

posted @ 2017-03-02 10:13  TRTTG  阅读(334)  评论(0编辑  收藏  举报