用c++后缀自动机实现最大公共字符串算法,并封装成Python库
后缀自动机的C++代码转自https://e-maxx.ru/algo/suffix_automata,其余封装为自写。
在C++文件同级目录建立setup.py文件,代码如下:
# !/usr/bin/env python from distutils.core import setup, Extension mod = "sam" setup(name=mod, ext_modules=[Extension(mod, sources=['sam_lcs.cpp'])])
封装完后缀自动机的源码后,命令行编译、安装、卸载,安装后即可在Python里import调用:
python setup.py build
python setup.py install
python setup.py uninstall
包装模块的C++函数编写如下:
#include <map> #include <string> #include <Python.h> using namespace std; struct state { int len, link; map<char, int> next; }; const int MAXLEN = 100000; state st[MAXLEN * 2]; int sz, last; void sa_init() { sz = last = 0; st[0].len = 0; st[0].link = -1; ++sz; // 清除状态: for (int i = 0; i < MAXLEN * 2; ++i) st[i].next.clear(); }; void sa_extend(char c) { int cur = sz++; st[cur].len = st[last].len + 1; int p; for (p = last; p != -1 && !st[p].next.count(c); p = st[p].link) st[p].next[c] = cur; if (p == -1) st[cur].link = 0; else { int q = st[p].next[c]; if (st[p].len + 1 == st[q].len) st[cur].link = q; else { int clone = sz++; st[clone].len = st[p].len + 1; st[clone].next = st[q].next; st[clone].link = st[q].link; for (; p != -1 && st[p].next[c] == q; p = st[p].link) st[p].next[c] = clone; st[q].link = st[cur].link = clone; } } last = cur; }; string lcs(string s, string t) { sa_init(); for (int i = 0; i < (int)s.length(); ++i) sa_extend(s[i]); int v = 0, l = 0, best = 0, bestpos = 0; for (int i = 0; i < (int)t.length(); ++i) { while (v && !st[v].next.count(t[i])) { v = st[v].link; l = st[v].len; } if (st[v].next.count(t[i])) { v = st[v].next[t[i]]; ++l; } if (l > best) best = l, bestpos = i; } return t.substr(bestpos - best + 1, best); }; static PyObject *sam_lcs(PyObject *self, PyObject *args) { char *stmp, *ttmp; string s, t; if (!PyArg_ParseTuple(args, "ss", &stmp, &ttmp)) return NULL; s = stmp; t = ttmp; return PyUnicode_FromString(lcs(s, t).c_str()); }; static PyMethodDef sam_lcs_Methods[] = { {"lcs", sam_lcs, METH_VARARGS, "Get a longest common string of two strings with SAM"}, {NULL, NULL, 0, NULL}}; static struct PyModuleDef sam = { PyModuleDef_HEAD_INIT, "sam", "SAM", -1, sam_lcs_Methods}; PyMODINIT_FUNC PyInit_sam(void) { return PyModule_Create(&sam); };
编译安装完成后,就可以在Python里调用了