hdu 6096 AC 自动机 前后缀的巧妙应用

ob has a dictionary with N words in it.
Now there is a list of words in which the middle part of the word has continuous letters disappeared. The middle part does not include the first and last character.
We only know the prefix and suffix of each word, and the number of characters missing is uncertain, it could be 0. But the prefix and suffix of each word can not overlap.
For each word in the list, Bob wants to determine which word is in the dictionary by prefix and suffix.
There are probably many answers. You just have to figure out how many words may be the answer.
Input
The first line of the input gives the number of test cases T; T test cases follow.
Each test case contains two integer N and Q, The number of words in the dictionary, and the number of words in the list.
Next N line, each line has a string Wi, represents the ith word in the dictionary (0<|Wi|≤1000000<|Wi|≤100000)
Next Q line, each line has two string Pi , Si, represents the prefix and suffix of the ith word in the list (0<|Pi|,|Si|≤100000,0<|Pi|+|Si|≤1000000<|Pi|,|Si|≤100000,0<|Pi|+|Si|≤100000)
All of the above characters are lowercase letters.
The dictionary does not contain the same words.

Limits
T≤5T≤5
0 < N,Q≤1000000 < N,Q ≤ 100000
∑Si+Pi ≤ 500000∑Si+Pi ≤ 500000
∑Wi ≤ 500000∑Wi ≤ 500000
Output
For each test case, output Q lines, an integer per line, represents the answer to each word in the list.
Sample Input
1
4 4
aba
cde
acdefa
cdef
a a
cd ef
ac a
ce f
Sample Output
2
1
1
0

给你一些字符串,再给你一些字符串的前缀后缀 让你匹配 这些前缀后缀能匹配多少前面的串。
AC自动机小技巧 把所有串变双倍,中间加个{,然后进行匹配,那么能匹配到终点的必然能匹配到 { 所以必然满足 匹配到了前缀后缀

例如原串 cde 变成 cdef{cdef 而被匹配的前后缀cd ef 变成了 ef{cd

根据匹配串建ac自动机,然后更新的时候把相应的fail位置也更新了,而且记录一下长度 要求前缀后缀匹配串的长度必须比匹配串的短,这样能杜绝一些不满足的情况。。。大佬写得不好,让我纠结了好几天。。。用自己的板子贼简单。。
之前还一直re,让我误以为板子有问题了。。板子没问题。。re是因为我要拼接的字符串数组开小了。。还先学了一个知识,开char *s[i]; 地址数组 把全部串都放ss里面,然后用s[i] 保留开始位置,串末尾保留空格那么就会自行断开

8.21 更新 发现自己的板子有个bug next数组在一些情况会达到负数,导致别的数组利用的时候可能会用到负坐标,现更新过来 把负的next数组改为指向0 加了一句else t[x].next[i]=t[t[x].fail].next[i]; 然后在下面的判断会不会读到没用的坐标处改为while(p<=0) 即0坐标不要了


#include <bits/stdc++.h>
using namespace std;
#define M 505000*3
struct trie{
    int sign;//是否为该单词的最后一个结点
    int fail;//失配指针
    int next[28];//26个字母方向的子结点
    int dep;
}t[M];
int q[M],head,tail,L;
char str[M];
int ans[M];

int Insert(char *a)//将单词插入字典树
{
    int i=0,p=0,j,x;
    while(a[i]){
        x=t[p].next[a[i]-'a'];
        if(x<0){//前面字符串未访问过此处,则申请新结点
            t[p].next[a[i]-'a']=x=++L;//数组模拟链表申请新结点(即++L操作)
            for(j=0;j<27;j++)t[x].next[j]=-1;
            t[x].dep=t[p].dep+1;
            t[x].fail=-1;t[x].sign=0;;//初始化新结点信息
        }
        p=x;
        i++;
    }
    t[x].sign=1;
    return x;
}
void build_ACauto()//更新失配指针
{
    int i,x,y,p;
    t[0].fail=-1;
    q[tail++]=0;//将根放入队列
    while(head<tail){
        x=q[head++];//取队首元素
        for(i=0;i<27;i++){
            y=t[x].next[i];
            if(y>=0){
                if(!x)t[y].fail=0;//如果x为根结点,那么他的子结点的失配指针为头结点
                else{
                    p=t[x].fail;//取父结点的失配指针
                    while(p>=0){//如果失配指针不为空,继续找
                        if(t[p].next[i]>=0){//如果找到结点与相配
                            t[y].fail=t[p].next[i];//将失配指针指向它后退出循环
                            break;
                        }
                        p=t[p].fail;//否则继续往上找
                    }
                    if(p<0)t[y].fail=0;//如果最终还是没有找到,则失配指针指向根结点
                }
                q[tail++]=y;//将子结点存入队尾
            }
            else t[x].next[i]=t[t[x].fail].next[i]; //板子改进
        }
    }
}

void search(char *s,int le)
{
    int i=0,j,p=0,x,num=0;
    while(s[i])
    {
        j=s[i]-'a';
        while(t[p].next[j]<=0&&p) p=t[p].fail;
        p=t[p].next[j];
        if(p<=0)p=0;
        x=p;
        while(x)
        {
            if(t[x].dep<=le)
            ans[x]++;
            x=t[x].fail;
        }
        i++;
    }
}



const int N = 100100;
int len[N];
char *s[N];
int pos[N];
char ss[M],t1[M],t2[M];


int main()
{
    int T,n,i;
    scanf("%d",&T);
    while(T--){
        int n,m;
        head=tail=L=0;
        t[0].fail=-1;//初始化头结点信息
        t[0].sign=0;
        for(i=0;i<27;i++)t[0].next[i]=-1;
        memset(ans,0,sizeof(ans));
        scanf("%d%d",&n,&m);
        int del=0;
        int j=0;
        for(int i=1;i<=n;i++)
        {
            s[i]=ss+j;
            scanf(" %s",s[i]);
            len[i]=strlen(s[i])+1;
            j+=len[i];
            strcpy(ss+j,s[i]);
            ss[j-1]='z'+1;
            j+=len[i];
        }


       for(int i=1;i<=m;i++)
        {
            scanf(" %s %s",t1+1,t2);
            t1[0]='z'+1;
            strcat(t2,t1);
            pos[i]=Insert(t2);
        }
        build_ACauto();

        for(int i=1;i<=n;i++)
        {
            search(s[i],len[i]);
        }

        for(int i=1;i<=m;i++) printf("%d\n",ans[pos[i]] );
    }
    return 0;
}
posted @ 2017-09-19 21:18  黑码的博客  阅读(133)  评论(0编辑  收藏  举报