多模式匹配算法

多模式匹配算法:给定几个特定的单词和一篇文章,查看文章中出现特定单词的次数; 多模式匹配算法通常有以下几个步骤组成: 1、通过几个给定的单词建立字典树 2、对字典树建立匹配失败后的指针 3、对给定的文章进行进行匹配,可以在线性时间内完成 下面给出相应代码:
#include
#include
#include
#include
#include
using namespace std ;

//创建结点

struct Node {
    Node *next[26] ;
    Node *fail ;
    bool is_over ;
    int len ;
};

//申请一个新的结点

Node* new_Node()    {
    Node *root = new Node ;
    root->fail = root ;
    for(int i = 0 ; i < 26 ; i++)
        root->next[i] = NULL ;
    root->is_over = false ;
    root->len = 0 ;
    return root ;
}

//创建字典树

void build_tree(Node *root ,char *s)    {
    int len = strlen(s) ;
    for(int i = 0 ; i < len ; i++)  {
        if(root->next[s[i]-'a'] == NULL)
            root->next[s[i]-'a'] = new_Node() ;
        root = root->next[s[i]-'a'] ;
    }
    root->len = len ;
    root->is_over = true ;
}

//为字典树创建Fail指针

void build_Fail(Node *root)   {
    queue q ;
    q.push(root) ;
    root->fail = root ;
    while(!q.empty())   {
        Node *p = q.front() ;
        Node *r = p ;
        q.pop() ;
        for(int i = 0 ; i < 26 ; i++)   {
            if(p->next[i] != NULL)  {
                while(r ->fail->next[i] == NULL && r != root)   //查看是否存在最长前缀后缀
                    r = r->fail ;
                if(r != root)                            //如果存在最长公共前缀后缀
                    p->next[i]->fail = r->fail->next[i] ;
                else
                   p->next[i]->fail = root ;
                q.push(p->next[i]) ;
            }
        }
    }
}

//进行匹配

int Auto_AC(Node* root , char *s)   {
    int len = strlen(s) ;
    int count = 0 ;
    Node *r = root ;

    for(int i = 0 ; i < len ; i++)  {
        if(!isalpha(s[i]))  {
            r = root ;
            continue ;
        }
        if(r->next[s[i]-'a'] != NULL)   {     
            r = r->next[s[i]-'a'] ;
        }
        else
            r = r-> fail ;
        if(r->is_over == true && !isalpha(s[i+1]) && !(isalpha(s[i-r->len])))
            count++ ;
    }
    return count ;
}

//主函数

int main()  {
    int m , n , t = 1 ;
    while(cin >> m >> n)    {
           // getchar() ;
        Node *root = new_Node() ;
        Node *r = root ;
        while(m--)  {
            char s[30] ;
            cin >> s ;
            getchar() ;
        //    cout << s << endl ;
            build_tree(root,s) ;
            root = r ;
        }
        root = r ;
       build_Fail(root) ;
        root = r ;
        int count[100] = {0} ;
        int ma = 0 ;
        char str[30][300] , str1[30][300] ;

        for(int i = 0 ; i < n ; i++)    {
            gets(str[i]) ;
           strcpy(str1[i],str[i]) ;

            int len = strlen(str[i]) ;
            for(int k = 0 ; k < len ; k++)
                if(isupper(str[i][k]))
                        str[i][k] += 32 ;
            count[i] = Auto_AC(root,str[i]) ;
            ma = max(ma,count[i]) ;
        }
        cout << "Excuse Set #" << t++ << endl;
        for(int k = 0 ; k < n ; k++)
            if(count[k] == ma)
                cout << str1[k] << endl ;
            cout << endl ;
    }
    return 0 ;
}
下面介绍一种暴力求解方法,通过调用strstr函数实现 下面给出相应代码:
#include
#include
#include
using namespace std;

#define N 111

char str[N][N];
char tem[N][N];
char pop[N][N];
int num[N];

void strdx(char f[])
{
	int k = strlen(f);
	for (int i = 0; i < k; i++)
		if (f[i] >= 'A' && f[i] <= 'Z')
			f[i] +=32;
}

int main()
{
	int n, m;
	int t = 1;

	while (cin >> n >> m)
	{
		getchar();
		// Init.
		memset(str, 0, sizeof(str));
		memset(tem, 0, sizeof(tem));
		memset(num, 0, sizeof(num));
		memset(pop, 0, sizeof(pop));

		// Read.
		for (int i = 0; i < n; i++)
		{
			gets(str[i]);
			strdx(str[i]);
		}
		for (int i = 0; i < m; i++)
		{
			gets(tem[i]);
			strcpy(pop[i], tem[i]);
			strdx(pop[i]);
		}

		// Count.
		for (int i = 0; i < m; i++)
		{
			char *move = NULL;
			for (int j = 0; j < n; j++)
			{
				move = strstr(pop[i], str[j]);
				if (move == NULL)
					continue;
				int k = strlen(str[j]);

				if (*(move + k) >= 'a' && *(move + k) <= 'z')
					continue;
			
				num[i]++;
			}
		}

		int max = 0;
		for (int i = 0; i < m; i++)
			if (num[i] > max)
				max = num[i];

		cout << "Excuse Set #" << t++ << endl;

		for (int i = 0; i < m; i++)
			if (max == num[i])
				puts(tem[i]);
		cout << endl;
	}
	return 0;
}
posted @ 2014-11-20 17:52  NYNU_ACM  阅读(222)  评论(0编辑  收藏  举报