hdu 6096String(trie树)

题目链接

String

Time Limit: 6000/3000 MS (Java/Others) Memory Limit: 524288/524288 K (Java/Others)
Total Submission(s): 784 Accepted Submission(s): 253

Problem Description
Bob has a dictionary with N words in it.
Now there is a list of words in which the middle part of the word has continuous letters disappeared. The middle part does not include the first and last character.
We only know the prefix and suffix of each word, and the number of characters missing is uncertain, it could be 0. But the prefix and suffix of each word can not overlap.
For each word in the list, Bob wants to determine which word is in the dictionary by prefix and suffix.
There are probably many answers. You just have to figure out how many words may be the answer.

Input
The first line of the input gives the number of test cases T; T test cases follow.
Each test case contains two integer N and Q, The number of words in the dictionary, and the number of words in the list.
Next N line, each line has a string Wi, represents the ith word in the dictionary (0<|Wi|≤100000)
Next Q line, each line has two string Pi , Si, represents the prefix and suffix of the ith word in the list (0<|Pi|,|Si|≤100000,0<|Pi|+|Si|≤100000)
All of the above characters are lowercase letters.
The dictionary does not contain the same words.

Limits
T≤5
0<N,Q≤100000
∑Si+Pi≤500000
∑Wi≤500000

Output
For each test case, output Q lines, an integer per line, represents the answer to each word in the list.

Sample Input
1
4 4
aba
cde
acdefa
cdef
a a
cd ef
ac a
ce f

Sample Output
2
1
1
0

Source
2017 Multi-University Training Contest - Team 6

题意:

给出\(n,q(0 < n,q \leq 100000)\),表示有\(n\)个单词,\(q\)个询问,每个询问给出一个前缀和一个后缀,输出有多少个单词符合该前缀和后缀,且前缀和后缀不能有重叠部分。

题解:

因为题目上给出的是前缀和后缀,所以我们把一个长度为\(n\)的字符串\(str[0..n-1]\)变为\(str_0,str_{n-1},str_1,str_{n-2}··· ···str_{n-1},str_0\);对于询问,设前缀为s,后缀为t,则在查询时将它们合为一个字符串\(s_0, t_{n-1},s_1, t_{n-2}...\),前后缀长度不等用\(*\)补齐。(如\(ac\)\(f\),构造后的字符串为 \(afc*\)).
字典树结点需要维护3个值,这个节点的后继节点\(child\),经过这个节点的字符串有多少个\(num[i]\),以及经过这个节点的字符串的长度\(len\)(用\(vector\)来存)。

为什么要存长度?

题目中要求前缀和后缀不重叠(在查询时因为要防止 字符串为 \(aaa\) 前缀为 \(aa\) 后缀为 \(aa\) 这种情况出现),所以我们需要存长度,建完 \(trie\) 树后对每个节点中vector的数值排序,询问时去掉长度小于前缀+后缀的值,最后得到的值就可能是当前这个前缀和后缀可能组成字符串的个数了。
这里还有一个要注意的地方就是当出现 \(*\) 的时候 \(*\) 后边所有的节点都可能是我们要匹配的答案,所以所有节点都要记录下来留作处理。这里我用2个队列来处理我们的节点(有点像滚动数组)。一个队列用来存放父亲节点,一个队列用来存放孩子节点,两个队列交换使用,实现对所有可能节点的计数。

#include<iostream>
#include<cstdio>
#include<algorithm>
#include<cstring>
#include<vector>
#include<queue>
#include<stack>
using namespace std;
#define rep(i,a,n) for (int i=a;i<n;i++)
#define per(i,a,n) for (int i=n-1;i>=a;i--)
#define pb push_back
#define fi first
#define se second
typedef vector<int> VI;
typedef long long ll;
typedef pair<int,int> PII;
const int inf=0x3fffffff;
const ll mod=1000000007;
const int maxn=5e5+10;
int a[maxn*2][26];
char s[maxn],st[maxn*2],s1[maxn];
int num[maxn*2];
int cnt;
VI V[maxn*2];
void init(int x)
{
    rep(i,0,26) a[x][i]=0;
    num[x]=0;
}

void insert(char *ss,int l)
{
    int now=1;
    rep(i,1,l+1)
    {
        //int t=ss[i]-'a';
        if(!a[now][ss[i]-'a'])
        {
            a[now][ss[i]-'a']=++cnt;
            init(cnt);
        }
        now=a[now][ss[i]-'a'];
        num[now]++;
        V[now].pb(l/2); //
    }
}

void dfs(int x)
{
    rep(i,0,26)
    {
        if(a[x][i]) dfs(a[x][i]);
    }
    sort(V[x].begin(),V[x].end());
}

int main()
{
    int cas;
    scanf("%d",&cas);
    while(cas--)
    {
        int n,q;
        scanf("%d%d",&n,&q);
        init(1);
        rep(i,0,maxn*2) V[i].clear();
        cnt=1;
        rep(i,1,n+1)
        {
            scanf("%s",s+1);
            int l=(int)strlen(s+1);
            rep(i,1,l+1) st[i*2-1]=s[i],st[i*2]=s[l-i+1];
            st[l*2+1]='\0';
            //printf("%s\n",st+1);
            insert(st,2*l);
        }
        dfs(1);
        while(q--)
        {
            scanf("%s%s",s+1,s1+1);
            int l1=(int)strlen(s+1),l2=(int)strlen(s1+1);
            int l=max(l1,l2);
            rep(i,l1+1,l+1) s[i]='*';
            int j=l+1;
            rep(k,0,l-l2) s[j++]='*';
            rep(i,1,l2+1) s[j++]=s1[i];
            rep(i,l+1,2*l+1) s1[i-l]=s[i];
            s[l+1]=s1[l+1]='\0';
            
            rep(i,1,l+1) st[2*i-1]=s[i],st[2*i]=s1[l-i+1];
            st[2*l+1]='\0';
            //printf("%s\n",st+1);
            l*=2;
            int ans=0,tmp=0;
            queue<int> q[2];
            q[0].push(1);
            rep(i,1,l+1)
            {
                tmp=1-tmp;
                while(!q[1-tmp].empty())
                {
                    int u=q[1-tmp].front();
                    q[1-tmp].pop();
                    if(st[i]=='*')
                    {
                        rep(j,0,26) if(a[u][j]) q[tmp].push(a[u][j]);
                    }
                    else
                    {
                        if(a[u][st[i]-'a'])
                            q[tmp].push(a[u][st[i]-'a']);
                    }
                }
            }
            while(!q[tmp].empty())
            {
                int u=q[tmp].front();
                q[tmp].pop();
                ans+=num[u];
                int t=(int)(lower_bound(V[u].begin(),V[u].end(),l1+l2)-V[u].begin());
                ans-=t;
            }
            printf("%d\n",ans);
        }
    }
    return 0;
}


posted @ 2017-08-14 15:50  tarjan's  阅读(77)  评论(0编辑  收藏  举报