huyc

导航

字符串搜索的自动机

用于字符串搜索的自动机实现,调用MachineCreate创建一个状态机,然后将字符流送入MachineCheck即可,代码实现:

typedef void *StateMac_t;

typedef struct {
    char c;
    int *map;
}StateTable;

typedef struct {
    int lastState;
    int curState;
    int tabSize;
    StateTable table[0];
} MachineState;

static int FitchUniqueList(char *buf, const char *str, size_t len){
    int i, r;
    char bk;
    *buf = *str;
    for(i = 1, r = 1; i < len; ++i){
        int j;
        buf[r] = str[i];
        for(j = r; j > 0; --j){
            if(buf[j] > buf[j-1]){
                break;
            }else if(buf[j] < buf[j-1]){
                int tmp = buf[j];
                buf[j] = buf[j-1];
                buf[j-1] = tmp;
            }else{
                int k;
                for(k = j; k < r; ++k){
                    buf[k] = buf[k+1];
                }
                --r;
                break;
            }
        }
        ++r;
    }
    return r;
}
//创建状态机
StateMac_t MachineCreate(const char *str){
    size_t slen = strlen(str);
    char buf[slen];
    MachineState *ms = (MachineState*)malloc(sizeof(MachineState)+slen*sizeof(StateTable));
    ms->lastState = slen;
    ms->curState = 0;
    ms->tabSize = FitchUniqueList(buf, str, slen);
    //init;
    int i;
    for(i = 0; i < ms->tabSize; ++i){
        ms->table[i].c = buf[i];
        ms->table[i].map = malloc(sizeof(int)*(slen+1));
        memset(ms->table[i].map, 0, sizeof(int)*(slen+1));
    }
    memcpy(buf, str, slen);
    //complier
    for(i = 0; i < ms->tabSize; ++i){
        //input
        StateTable *sTab = ms->table+i;
        int s = 0;//state
        while(s <= ms->lastState){
            char bkc = buf[s];
            int its;
            buf[s] = sTab->c;
            for(its = 1; its <= s+1; ++its){
                if(strncmp(buf+s+1-its, str, its) == 0){
                    sTab->map[s] = its;
                }
            }
            buf[s] = bkc;
            ++s;
        }
    }
    return ms;
}
//销毁状态机
void MachineDestroy(StateMac_t mp){
    if(mp){
        int i;
        MachineState *ms = (MachineState*)mp;
        for(i = 0; i < ms->lastState; ++i){
            free(ms->table[i].map);
        }
    }
}
//返回状态机状态
int MachineCheck(StateMac_t mp, char c){
    MachineState *ms = (MachineState*)mp;
    int i, cs = ms->curState;
    ms->curState = 0;
    for(i = 0; i < ms->tabSize; ++i){
        if(ms->table[i].c == c){
            ms->curState = ms->table[i].map[cs];
        }
    }
    return ms->curState == ms->lastState;
}
//返回当前状态
int MachineCurrentState(StateMac_t mp){
    return ((MachineState*)mp)->curState;
}
//重设状态
void MachineReset(StateMac_t mp){
    MachineState *ms = (MachineState*)mp;
    ms->curState = 0;
}

posted on 2012-05-27 14:35  huyc  阅读(306)  评论(0编辑  收藏  举报