Loading

AC自动机

一. 原理

AC自动机是在Trie树上构建的,比Trie树要多一个失配时的失败指针。
构建失败指针需要树的广度遍历。
每个节点(curr)的fail 指针,是跟前其父节点的fail指针构建的。

  1. 如果父节点(father)失败指针指向的节点,它(father.fail)的孩子里当前节点(curr)的字符,则curr的失败指针指向father.fail的与curr字符匹配的孩子
  2. 如果父节点(father)失败指针指向的节点,它(father.fail)的孩子里没有当前节点(curr)的字符,则继续沿着father.fail.fail指针指向的方向移动,重复进行检查新的节点的孩子与curr对应的字符的匹配的情况,直到要检查的节点变为根。(变为根时,curr的失败指针就指向根)
  3. 如果curr是root的子节点(不是孙节点/重重孙节点等),则curr的失败节点直接指向根。

构建好后,AC自动机就有两种状态,两种转移:
状态:

  • 根节点(起始状态)
  • 非根节点

转移:

  • 当前字符匹配(沿child指针转移)
  • 当前字符不匹配(沿fail指针转移)

二. 代码

//trie tree
#include <vector>
#include <string>
#include <iostream>
#include <queue>

using namespace std;

struct TrieT_node{
    bool mark;

    TrieT_node* final;

    TrieT_node* father;

    TrieT_node* childs[26];

    int p;

    TrieT_node(){
        for (int i = 0; i < 26; i++) {
            childs[i] = NULL;
        }
        mark = false;
        father = this;
    }


};

struct TrieT{
    TrieT_node root;

    void insert(const string& s){
        TrieT_node* curr = &root;
        for (int i = 0; i < s.size(); i++) {
            int p = s[i]-'a';
            if(curr->childs[p] == NULL){
                curr->childs[p] = new TrieT_node();
                curr->childs[p]->father = curr;
                curr->childs[p]->p = p;
                curr->childs[p]->final = NULL;
                curr = curr->childs[p];
            }else{
                curr = curr->childs[p];
            }
        }
        curr->mark = true;
    }

    bool search(const string &s){
        TrieT_node* curr = &root;
        for (int i = 0; i < s.size(); i++) {
            int p = s[i]-'a';
            if(curr->childs[p] == NULL){
                return false;
            }else{
                curr = curr->childs[p];
            }
        }
        return curr->mark;
    }

    void build(){
        //build AC automaton
        //init
        root.final = &root;
        queue<TrieT_node*> myqueue;
        for (int i = 0; i < 26; i++) {
            if(root.childs[i] != NULL){
                root.childs[i]->final = &root;
                myqueue.push(root.childs[i]);
            }
        }
        //BFS
        while (!myqueue.empty()){
            TrieT_node* curr = myqueue.front();
            myqueue.pop();

            TrieT_node* trans_father = curr->father->final;
            while (curr->final == NULL){
                int p = curr->p;
                if(trans_father->childs[p] != NULL){
                    curr->final = trans_father->childs[p];
                } else{
                    if(trans_father == &root){
                        curr->final = &root;
                    } else{
                        trans_father = trans_father->final;
                    }
                }
            }
            for (int i = 0; i < 26; i++) {
                if (curr->childs[i] != NULL) {
                    myqueue.push(curr->childs[i]);
                }
            }
        }
    }

    bool solve(const string& s){
        TrieT_node* curr = &root;
        for (int i = 0; i < s.size(); ) {
            int p = s[i]-'a';
            if(curr->childs[p] == NULL){
                if(curr == &root){
                    i++;
                }
                curr = curr->final;
            } else{
                i++;
                curr = curr->childs[p];
                if(curr->mark){
                    return true;
                }
            }
        }
        return false;
    }
};

int main(){

    TrieT t;
    string s = "abc";
    t.insert(s);
    t.insert("bcf");
    t.insert("abcfg");

    t.build();

    cout<<t.solve("abc")<<endl;
    cout<<t.solve("saweq")<<endl;

    cout<<t.solve("bcf")<<endl;
    cout<<t.solve("ababc")<<endl;
    cout<<t.solve("ab")<<endl;


//    cout<<t.solve("ab")<<endl;
    return 0;
}
posted @ 2020-12-24 15:09  有人找你  阅读(43)  评论(0编辑  收藏  举报