HDU 5880 Family View (AC自动机)

Family View

Time Limit: 3000/1000 MS (Java/Others)    Memory Limit: 65536/65536 K (Java/Others)
Total Submission(s): 2099    Accepted Submission(s): 441


Problem Description
Steam is a digital distribution platform developed by Valve Corporation offering digital rights management (DRM), multiplayer gaming and social networking services. A family view can help you to prevent your children access to some content which are not suitable for them.

Take an MMORPG game as an example, given a sentence T, and a list of forbidden words {P}, your job is to use '*' to subsititute all the characters, which is a part of the substring matched with at least one forbidden word in the list (case-insensitive).

For example, T is: "I love Beijing's Tiananmen, the sun rises over Tiananmen. Our great leader Chairman Mao, he leades us marching on."

And {P} is: {"tiananmen", "eat"}

The result should be: "I love Beijing's *********, the sun rises over *********. Our gr*** leader Chairman Mao, he leades us marching on."
 

 

Input
The first line contains the number of test cases. For each test case:
The first line contains an integer n, represneting the size of the forbidden words list P. Each line of the next n lines contains a forbidden words Pi (1|Pi|1000000,|Pi|1000000) where Pi only contains lowercase letters.

The last line contains a string T (|T|1000000).
 

 

Output
For each case output the sentence in a line.
 

 

Sample Input
1 3 trump ri o Donald John Trump (born June 14, 1946) is an American businessman, television personality, author, politician, and the Republican Party nominee for President of the United States in the 2016 election. He is chairman of The Trump Organization, which is the principal holding company for his real estate ventures and other business interests.
 

 

Sample Output
D*nald J*hn ***** (b*rn June 14, 1946) is an Ame**can businessman, televisi*n pers*nality, auth*r, p*litician, and the Republican Party n*minee f*r President *f the United States in the 2016 electi*n. He is chairman *f The ***** *rganizati*n, which is the p**ncipal h*lding c*mpany f*r his real estate ventures and *ther business interests.
 
分析: 题意让我们把列表中的出现的单词,在文章中全部替换为*(无论大小写)
多元匹配的问题,我们可以想到用AC自动机.
我们可以用end[j]表示以j节点结尾的单词的最大长度,
那么如果匹配到当前的第j个字符且end[j]>0,将从后往前的end[j]长度的字符串全部化为*
大小问题的处理方式.我们可以先标记大写字母,然后将文章全部化为小写字母,
这样的话,在输出的时候,如果该字母没有被变成*,且被标记,就输出对应的大写字母
 
代码如下:
#include <stdio.h>
#include <algorithm>
#include <iostream>
#include <string.h>
#include <queue>
using namespace std;
int vis[1000010];
struct Trie
{
    int Next[1000010][26];//26是这里讨论26个小写字母的情况,根据情况修改
    int fail[1000010],end[1000010];//end数组表示以该节点结尾的字符串的数量
    int len[1000010];
    int root,L;//L用来标记节点序号,以广度优先展开的字典树的序号
    int newnode()  //建立新节点
    {
        for(int i = 0;i < 26;i++)
            Next[L][i] = -1;     //将该节点的后继节点域初始化
        len[L]=0;
        end[L++] = 0;
        return L-1;    //返回当前节点编号
    }
    void init() //初始化操作
    {
        L = 0;
        root = newnode();
    }
    void insert(char buf[])
    {
        int Len = strlen(buf);
        int now = root;
        for(int i = 0;i < Len;i++)
        {
            if(Next[now][buf[i]-'a'] == -1)  //如果未建立当前的后继节点,建立新的节点
                Next[now][buf[i]-'a'] = newnode();
                len[Next[now][buf[i]-'a']]=len[now]+1;
            now = Next[now][buf[i]-'a'];
        }
        end[now]=len[now];//以该节点结尾的字符串数量增加1
    }
    void build()
    {
        queue<int>Q; //用广度优先的方式,将树层层展开
        fail[root] = root;
        for(int i = 0;i < 26;i++)
            if(Next[root][i] == -1)
                Next[root][i] = root;
            else
            {
                fail[Next[root][i]] = root;
                Q.push(Next[root][i]);
            }
        while( !Q.empty() )
        {
            int now = Q.front();
            Q.pop();
            end[now]=max(end[now],end[fail[now]]);
            for(int i = 0;i < 26;i++)
                if(Next[now][i] == -1)
                    Next[now][i] = Next[fail[now]][i];//该段的最后一个节点匹配后,跳到拥有最大公共后缀的fail节点继续匹配
                else
                {
                    fail[Next[now][i]]=Next[fail[now]][i];//当前节点的fail节点等于它前驱节点的fail节点的后继节点
                    Q.push(Next[now][i]);
                }
        }
    }
    void solve(char buf[])
    {
        int L=strlen(buf);
        int now=root;
        for(int i=0;i<L;i++)
        {
          now=Next[now][buf[i]-'a'];
          if(end[now]>0)
          {
             for(int j=i;j>i-end[now];j--)
              buf[j]='*';
          }
        }
        for(int i=0;i<L;i++)
        {
            if(buf[i]!='*'&&vis[i]==1)
            printf("%c",buf[i]-32);
            else
            printf("%c",buf[i]);
        }
        printf("\n");
    }
};
char buf[1000010];
Trie ac;
int main()
{
    int  t,n,ans;
    scanf("%d",&t);
    while(t--)
    {
        memset(vis,0,sizeof(vis));
        ac.init();
       scanf("%d",&n);
       for(int i=0;i<n;i++)
       {
           scanf("%s",buf);
           ac.insert(buf);
       }
       ac.build();
         getchar();
         gets(buf);
         for(int i=0;i<strlen(buf);i++)
         {
            if(buf[i]>='A'&&buf[i]<='Z'){
             buf[i]=buf[i]+32;
             vis[i]=1;
            }
         }
         ac.solve(buf);
    }
    return 0;
}

 

#include <stdio.h>
#include <algorithm>
#include <iostream>
#include <string.h>
#include <queue>
using namespace std;
int vis[1000010];
struct Trie
{
    int Next[1000010][26];//26是这里讨论26个小写字母的情况,根据情况修改
    int fail[1000010],end[1000010];//end数组表示以该节点结尾的字符串的数量
    int len[1000010];
    int root,L;//L用来标记节点序号,以广度优先展开的字典树的序号
    int newnode()  //建立新节点
    {
        for(int i = 0;i < 26;i++)
            Next[L][i] = -1;     //将该节点的后继节点域初始化
        len[L]=0;
        end[L++] = 0;
        return L-1;    //返回当前节点编号
    }
    void init() //初始化操作
    {
        L = 0;
        root = newnode();
    }
    void insert(char buf[])
    {
        int Len = strlen(buf);
        int now = root;
        for(int i = 0;i < Len;i++)
        {
            if(Next[now][buf[i]-'a'] == -1)  //如果未建立当前的后继节点,建立新的节点
                Next[now][buf[i]-'a'] = newnode();
                len[Next[now][buf[i]-'a']]=len[now]+1;
            now = Next[now][buf[i]-'a'];
        }
        end[now]=len[now];//以该节点结尾的字符串数量增加1
    }
    void build()
    {
        queue<int>Q; //用广度优先的方式,将树层层展开
        fail[root] = root;
        for(int i = 0;i < 26;i++)
            if(Next[root][i] == -1)
                Next[root][i] = root;
            else
            {
                fail[Next[root][i]] = root;
                Q.push(Next[root][i]);
            }
        while( !Q.empty() )
        {
            int now = Q.front();
            Q.pop();
            end[now]=max(end[now],end[fail[now]]);
            for(int i = 0;i < 26;i++)
                if(Next[now][i] == -1)
                    Next[now][i] = Next[fail[now]][i];//该段的最后一个节点匹配后,跳到拥有最大公共后缀的fail节点继续匹配
                else
                {
                    fail[Next[now][i]]=Next[fail[now]][i];//当前节点的fail节点等于它前驱节点的fail节点的后继节点
                    Q.push(Next[now][i]);
                }
        }
    }
    void solve(char buf[])
    {
        int L=strlen(buf);
        int now=root;
        for(int i=0;i<L;i++)
        {
          now=Next[now][buf[i]-'a'];
          if(end[now]>0)
          {
             for(int j=i;j>i-end[now];j--)
              buf[j]='*';
          }
        }
        for(int i=0;i<L;i++)
        {
            if(buf[i]!='*'&&vis[i]==1)
            printf("%c",buf[i]-32);
            else
            printf("%c",buf[i]);
        }
        printf("\n");
    }
};
char buf[1000010];
Trie ac;
int main()
{
    int  t,n,ans;
    scanf("%d",&t);
    while(t--)
    {
        memset(vis,0,sizeof(vis));
        ac.init();
       scanf("%d",&n);
       for(int i=0;i<n;i++)
       {
           scanf("%s",buf);
           ac.insert(buf);
       }
       ac.build();
         getchar();
         gets(buf);
         for(int i=0;i<strlen(buf);i++)
         {
            if(buf[i]>='A'&&buf[i]<='Z'){
             buf[i]=buf[i]+32;
             vis[i]=1;
            }
         }
         ac.solve(buf);
    }
    return 0;
}

posted @ 2017-10-02 23:46  hinata_hajime  阅读(247)  评论(0编辑  收藏  举报