字符串板
kmp,exkmp,ac自动机字符串下标从$0$开始, 其他都从$1$开始
1. kmp
$f_i$表示$s[0...i-1]$的$border$长度
如果$n-f_n$能整除$n$, 那么最小循环节为$n-f_n$, 否则不是周期串
//初始化串s的失配函数f void init(char *s, int *f) { f[0] = f[1] = 0; int n = strlen(s); for (int i=1,j=0; i<n; ++i) { while (j&&s[i]!=s[j]) j=f[j]; if (s[i]==s[j]) ++j; f[i+1] = j; } } //求出s在t中的出现次数 int kmp(char *s, char *t, int *f) { int n = strlen(t), m = strlen(s), ans = 0; for (int i=0,j=0; i<n; ++i) { while (j&&s[j]!=t[i]) j=f[j]; if (s[j]==t[i]) ++j; if (j==m) ++ans, j=f[j]; } return ans; }
2. ac自动机
$ac$自动机每个状态$x$可以看成从根到$x$的字符串, $fa$数组指向这个串最长合法后缀对应状态
求每个模板串在文本串中出现次数, 只要先匹配求出每个状态匹配的次数, 然后每个模板串出现次数就为$fail$树中子树匹配次数和
求所有模板串在文本串中出现次数和, 那么每匹配到一个状态, 答案加上这个状态到根的路径中模板串的个数即可
const int N = 1e6+10; struct AC_automaton { int rt, cnt, ch[N][26], fa[N]; void init() { rt = cnt = 1; memset(ch[1],0,sizeof ch[1]); fa[1] = 0; } int newnode() { ++cnt; memset(ch[cnt],0,sizeof ch[cnt]); fa[cnt] = 0; return cnt; } //插入一个模板串 void ins(char *s) { int now = rt, n = strlen(s); for (int i=0; i<n; ++i) { int c = s[i]-'a'; if (!ch[now][c]) ch[now][c]=newnode(); now = ch[now][c]; } } //构建ac自动机 void build() { queue<int> q; for (int i=0; i<26; ++i) { if (ch[rt][i]) fa[ch[rt][i]]=rt,q.push(ch[rt][i]); else ch[rt][i]=rt; } while (q.size()) { int u = q.front(); q.pop(); for (int i=0; i<26; ++i) { int &v = ch[u][i]; if (v) { fa[v] = ch[fa[u]][i]; q.push(v); } else v = ch[fa[u]][i]; } } } //与串s匹配 void query(char *s) { int now = rt, n = strlen(s); for (int i=0; i<n; ++i) { now = ch[now][s[i]-'a']; //now即为s[0...i]匹配到的状态 } } } ac;
3. hash
经典双模数hash
const int N = 1e6+10; const int P1 = 876756319, B1 = 991; const int P2 = 799898821, B2 = 2333; int fac1[N], fac2[N], f1[N], f2[N]; void init(char *s) { int n = strlen(s+1); for (int i=1; i<=n; ++i) { f1[i] = ((long long)f1[i-1]*B1+s[i]-'a'+1)%P1; f2[i] = ((long long)f2[i-1]*B2+s[i]-'a'+1)%P2; } } pair<int,int> Hash(int l, int r) { int x = (f1[r]-(long long)f1[l-1]*fac1[r-l+1])%P1; int y = (f2[r]-(long long)f2[l-1]*fac2[r-l+1])%P2; if (x<0) x+=P1; if (y<0) y+=P2; return pair<int,int>(x,y); } int main() { fac1[0] = fac2[0] = 1; for (int i=1; i<N; ++i) { fac1[i] = (long long)fac1[i-1]*B1%P1; fac2[i] = (long long)fac2[i-1]*B2%P2; } }
4. manacher
$S$是辅助串, $rad_i$表示串$S$中第$i$位的回文半径
判断$s[i...j]$是否为回文, 若$rad[i+j]\ge j-i+1$则为回文, 否则不为回文
const int N = 1e6+10; char S[N]; void manechar(char *s) { int n = strlen(s+1); S[1] = '#'; for (int i=1; i<=n; ++i) { S[i*2] = s[i]; S[i*2+1] = '#'; } n = 2*n+1, S[n+1] = '$'; for (int i=1,j=0,k; i<=n; i+=k) { while (S[i-j-1]==S[i+j+1]) ++j; rad[i] = j; for (k=1; k<=rad[i]&&rad[i-k]!=rad[i]-k; ++k) { rad[i+k] = min(rad[i-k], rad[i]-k); } j = max(j-k, 0); } }
5. 回文树
回文树每个状态表示一个回文串, $fa$数组指向这个串的最长回文后缀
状态0为偶回文的根, 状态1为奇回文的根
//tot为当前插入字符总数, cnt为当前状态数, cnt-1为本质回文串数 struct PalindromicTree { int tot,cnt,last,len[N],fa[N],ch[N][26]; char s[N]; void init() { s[0] = '#', last = 0, cnt = 1, tot = 0; fa[0] = 1, len[0] = 0, len[1] = -1; memset(ch[0],0,sizeof ch[0]); memset(ch[1],0,sizeof ch[0]); } int newnode() { ++cnt; memset(ch[cnt],0,sizeof ch[0]); fa[cnt] = len[cnt] = 0; return cnt; } int getfail(int x) { while (s[tot-len[x]-1]!=s[tot]) x=fa[x]; return x; } void ins(int c) { s[++tot] = c; int p = getfail(last); if (!ch[p][c]) { int q = newnode(); len[q] = len[p]+2; fa[q] = ch[getfail(fa[p])][c]; ch[p][c] = q; } last = ch[p][c]; } };
6. exkmp
$z_i$表示$s[i...n-1]$与$s$的$lcp$长度, $z_0$无意义
void init(char *s, int *z, int n) { int mx = 0, l = 0; for (int i=1; i<n; ++i) { z[i] = i<mx?min(mx-i,z[i-l]):0; while (s[z[i]]==s[i+z[i]]) ++z[i]; if (i+z[i]>mx) mx=i+z[i],l=i; } }
7. SA
${rk}_i$表示后缀$i$的排名, ${sa}_i$表示排名为$i$的后缀位置
$h_i$表示第$i$小后缀和第$i-1$小后缀的$lcp$长度, $h_0$无意义
$f$是$h$的$st$表, 用来求任意两后缀$lcp$长度
struct SA { int c[N],rk[N],h[N],sa[N],a[N],f[N][20]; void build(int *a, int m) { a[n+1] = rk[n+1] = h[n+1] = 0; int i,*x=rk,*y=h; for(i=1;i<=m;i++) c[i]=0; for(i=1;i<=n;i++) c[x[i]=a[i]]++; for(i=1;i<=m;i++) c[i]+=c[i-1]; for(i=n;i;i--) sa[c[x[i]]--]=i; for(int k=1,p;k<=n;k<<=1) { p=0; for(i=n-k+1;i<=n;i++) y[++p]=i; for(i=1;i<=n;i++) if(sa[i]>k) y[++p]=sa[i]-k; for(i=1;i<=m;i++) c[i]=0; for(i=1;i<=n;i++) c[x[y[i]]]++; for(i=1;i<=m;i++) c[i]+=c[i-1]; for(i=n;i;i--) sa[c[x[y[i]]]--]=y[i]; swap(x,y); x[sa[1]]=1; p=1; for(i=2;i<=n;i++) x[sa[i]]=(y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k])?p:++p; if(p==n) break; m=p; } for(i=1;i<=n;i++) rk[sa[i]]=i; for(int i=1,j,k=0;i<=n;i++) if (rk[i]!=1) { if(k) k--; j=sa[rk[i]-1]; while(a[i+k]==a[j+k]) k++; h[rk[i]] = k; } } void init(int *a) { build(a,100); for (int i=1; i<=n; ++i) f[i][0] = h[i]; for (int j=1; j<=19; ++j) { for (int i=0; i+(1<<j-1)-1<=n; ++i) { f[i][j] = min(f[i][j-1], f[i+(1<<j-1)][j-1]); } } } int lcp(int l, int r) { if (l==r) return n-l+1; l = rk[l], r = rk[r]; if (l>r) swap(l,r); ++l; int t = Log[r-l+1]; return min(f[l][t], f[r-(1<<t)+1][t]); } };
8. SAM
$SAM$就是把字符串按$endpos$等价类划分, 每个状态表示一个子串集合, 集合内所有子串$endpos$集合都相等, 并且每个子串都对应一条从根到该状态的路径
对于每个状态$x$, $x$所有子串是一系列连续后缀, 子串逐渐减短时$endpos$集合会变大, 得到一个新状态$y$, ${fa}_x$就指向这个状态$y$
对于每个转移$x\leftarrow y$, $x$的所有子串都是$y$所有子串的后缀, 并且$y$的$endpos$是$x$的$endpos$的子集
struct SuffixAutomaton { int last, cnt; int ch[N << 1][26], fa[N << 1], len[N << 1], pos[N << 1], sz[N << 1]; void init() { last = cnt = 1; memset(ch[1], 0, sizeof ch[1]); fa[1] = len[1] = 0; } int newnode(int idx) { ++cnt; memset(ch[cnt], 0, sizeof ch[cnt]); fa[cnt] = len[cnt] = 0; pos[cnt] = idx; return cnt; } int ins(int c) { int p = last , np = newnode(pos[last] + 1); last = np, len[np] = len[p] + 1; for(; p && !ch[p][c]; p = fa[p]) ch[p][c] = np; if(!p) fa[np] = 1; else { int q = ch[p][c]; if(len[p] + 1 == len[q]) fa[np] = q; else { int nq = newnode(pos[p] + 1); len[nq] = len[p] + 1; memcpy(ch[nq], ch[q], sizeof ch[q]); fa[nq] = fa[q], fa[q] = fa[np] = nq; for(; ch[p][c] == q; p = fa[p]) ch[p][c] = nq; } } sz[np] = 1; return np; } };