字符串板

kmp,exkmp,ac自动机字符串下标从$0$开始, 其他都从$1$开始

1. kmp

$f_i$表示$s[0...i-1]$的$border$长度

如果$n-f_n$能整除$n$, 那么最小循环节为$n-f_n$, 否则不是周期串

//初始化串s的失配函数f
void init(char *s, int *f) {
    f[0] = f[1] = 0;
    int n = strlen(s);
    for (int i=1,j=0; i<n; ++i) {
        while (j&&s[i]!=s[j]) j=f[j];
        if (s[i]==s[j]) ++j;
        f[i+1] = j;
    }
}
//求出s在t中的出现次数
int kmp(char *s, char *t, int *f) {
    int n = strlen(t), m = strlen(s), ans = 0;
    for (int i=0,j=0; i<n; ++i) {
        while (j&&s[j]!=t[i]) j=f[j];
        if (s[j]==t[i]) ++j;
        if (j==m) ++ans, j=f[j];
    }
    return ans;
}
View Code

 

2. ac自动机

$ac$自动机每个状态$x$可以看成从根到$x$的字符串, $fa$数组指向这个串最长合法后缀对应状态

求每个模板串在文本串中出现次数, 只要先匹配求出每个状态匹配的次数, 然后每个模板串出现次数就为$fail$树中子树匹配次数和

求所有模板串在文本串中出现次数和, 那么每匹配到一个状态, 答案加上这个状态到根的路径中模板串的个数即可

const int N = 1e6+10;
struct AC_automaton {
    int rt, cnt, ch[N][26], fa[N];
    void init() {
        rt = cnt = 1;
        memset(ch[1],0,sizeof ch[1]);
        fa[1] = 0;
    }
    int newnode() {
        ++cnt;
        memset(ch[cnt],0,sizeof ch[cnt]);
        fa[cnt] = 0;
        return cnt;
    }
    //插入一个模板串
    void ins(char *s) {
        int now = rt, n = strlen(s);
        for (int i=0; i<n; ++i) {
            int c = s[i]-'a';
            if (!ch[now][c]) ch[now][c]=newnode();
            now = ch[now][c];
        }
    }
    //构建ac自动机
    void build() {
        queue<int> q;
        for (int i=0; i<26; ++i) {
            if (ch[rt][i]) fa[ch[rt][i]]=rt,q.push(ch[rt][i]);
            else ch[rt][i]=rt;
        }
        while (q.size()) {
            int u = q.front(); q.pop();
            for (int i=0; i<26; ++i) {
                int &v = ch[u][i];
                if (v) { 
                    fa[v] = ch[fa[u]][i];
                    q.push(v);
                }
                else v = ch[fa[u]][i];
            }
        }
    }
    //与串s匹配
    void query(char *s) {
        int now = rt, n = strlen(s);
        for (int i=0; i<n; ++i) {
            now = ch[now][s[i]-'a'];
            //now即为s[0...i]匹配到的状态
        }
    }
} ac;
View Code

 

3. hash

经典双模数hash

const int N = 1e6+10;
const int P1 = 876756319, B1 = 991;
const int P2 = 799898821, B2 = 2333;
int fac1[N], fac2[N], f1[N], f2[N];
void init(char *s) {
    int n = strlen(s+1);
    for (int i=1; i<=n; ++i) {
        f1[i] = ((long long)f1[i-1]*B1+s[i]-'a'+1)%P1;
        f2[i] = ((long long)f2[i-1]*B2+s[i]-'a'+1)%P2;
    }
}
pair<int,int> Hash(int l, int r) {
    int x = (f1[r]-(long long)f1[l-1]*fac1[r-l+1])%P1;
    int y = (f2[r]-(long long)f2[l-1]*fac2[r-l+1])%P2;
    if (x<0) x+=P1; if (y<0) y+=P2;
    return pair<int,int>(x,y);
}

int main() {
    fac1[0] = fac2[0] = 1;
    for (int i=1; i<N; ++i) {
        fac1[i] = (long long)fac1[i-1]*B1%P1;
        fac2[i] = (long long)fac2[i-1]*B2%P2;
    }
}
View Code

 

4. manacher

$S$是辅助串, $rad_i$表示串$S$中第$i$位的回文半径

判断$s[i...j]$是否为回文, 若$rad[i+j]\ge j-i+1$则为回文, 否则不为回文

const int N = 1e6+10;
char S[N];
void manechar(char *s) {
    int n = strlen(s+1);
    S[1] = '#';
    for (int i=1; i<=n; ++i) {
        S[i*2] = s[i];
        S[i*2+1] = '#';
    }
    n = 2*n+1, S[n+1] = '$';
    for (int i=1,j=0,k; i<=n; i+=k) {
        while (S[i-j-1]==S[i+j+1]) ++j;
        rad[i] = j;
        for (k=1; k<=rad[i]&&rad[i-k]!=rad[i]-k; ++k) {
            rad[i+k] = min(rad[i-k], rad[i]-k);
        }
        j = max(j-k, 0);
    }
}
View Code

 

5. 回文树

回文树每个状态表示一个回文串, $fa$数组指向这个串的最长回文后缀 

状态0为偶回文的根, 状态1为奇回文的根 

//tot为当前插入字符总数, cnt为当前状态数, cnt-1为本质回文串数
struct PalindromicTree {
    int tot,cnt,last,len[N],fa[N],ch[N][26];
    char s[N];
    void init() { 
        s[0] = '#', last = 0, cnt = 1, tot = 0;
        fa[0] = 1, len[0] = 0, len[1] = -1;
        memset(ch[0],0,sizeof ch[0]);
        memset(ch[1],0,sizeof ch[0]);
    }
    int newnode() {
        ++cnt;
        memset(ch[cnt],0,sizeof ch[0]);
        fa[cnt] = len[cnt] = 0;
        return cnt;
    }
    int getfail(int x) {
        while (s[tot-len[x]-1]!=s[tot]) x=fa[x];
        return x;
    }
    void ins(int c) {
        s[++tot] = c;
        int p = getfail(last);
        if (!ch[p][c]) {
            int q = newnode();
            len[q] = len[p]+2;
            fa[q] = ch[getfail(fa[p])][c];
            ch[p][c] = q;
        }
        last = ch[p][c];
    }
};
View Code

 

6. exkmp

$z_i$表示$s[i...n-1]$与$s$的$lcp$长度, $z_0$无意义

void init(char *s, int *z, int n) {
    int mx = 0, l = 0;
    for (int i=1; i<n; ++i) {
        z[i] = i<mx?min(mx-i,z[i-l]):0;
        while (s[z[i]]==s[i+z[i]]) ++z[i];
        if (i+z[i]>mx) mx=i+z[i],l=i;
    }
}
View Code

 

7. SA

${rk}_i$表示后缀$i$的排名, ${sa}_i$表示排名为$i$的后缀位置

$h_i$表示第$i$小后缀和第$i-1$小后缀的$lcp$长度, $h_0$无意义

$f$是$h$的$st$表, 用来求任意两后缀$lcp$长度

struct SA {
    int c[N],rk[N],h[N],sa[N],a[N],f[N][20];
    void build(int *a, int m) {
        a[n+1] = rk[n+1] = h[n+1] = 0;
        int i,*x=rk,*y=h;
        for(i=1;i<=m;i++) c[i]=0;
        for(i=1;i<=n;i++) c[x[i]=a[i]]++;
        for(i=1;i<=m;i++) c[i]+=c[i-1];
        for(i=n;i;i--) sa[c[x[i]]--]=i;
        for(int k=1,p;k<=n;k<<=1) {
            p=0;
            for(i=n-k+1;i<=n;i++) y[++p]=i;
            for(i=1;i<=n;i++) if(sa[i]>k) y[++p]=sa[i]-k;
            for(i=1;i<=m;i++) c[i]=0;
            for(i=1;i<=n;i++) c[x[y[i]]]++;
            for(i=1;i<=m;i++) c[i]+=c[i-1];
            for(i=n;i;i--) sa[c[x[y[i]]]--]=y[i];
            swap(x,y); x[sa[1]]=1; p=1;
            for(i=2;i<=n;i++)
                x[sa[i]]=(y[sa[i-1]]==y[sa[i]] && y[sa[i-1]+k]==y[sa[i]+k])?p:++p;
            if(p==n) break; m=p;
        }
        for(i=1;i<=n;i++) rk[sa[i]]=i;
        for(int i=1,j,k=0;i<=n;i++) if (rk[i]!=1) {
            if(k) k--;
            j=sa[rk[i]-1];
            while(a[i+k]==a[j+k]) k++;
            h[rk[i]] = k;
        }
    }
    void init(int *a) {
        build(a,100);
        for (int i=1; i<=n; ++i) f[i][0] = h[i];
        for (int j=1; j<=19; ++j) {
            for (int i=0; i+(1<<j-1)-1<=n; ++i) {
                f[i][j] = min(f[i][j-1], f[i+(1<<j-1)][j-1]);
            }
        }
    }
    int lcp(int l, int r) {
        if (l==r) return n-l+1;
        l = rk[l], r = rk[r];
        if (l>r) swap(l,r); ++l;
        int t = Log[r-l+1];
        return min(f[l][t], f[r-(1<<t)+1][t]);
    }
};
View Code

 

8. SAM

$SAM$就是把字符串按$endpos$等价类划分, 每个状态表示一个子串集合, 集合内所有子串$endpos$集合都相等, 并且每个子串都对应一条从根到该状态的路径

对于每个状态$x$, $x$所有子串是一系列连续后缀, 子串逐渐减短时$endpos$集合会变大, 得到一个新状态$y$, ${fa}_x$就指向这个状态$y$

对于每个转移$x\leftarrow y$, $x$的所有子串都是$y$所有子串的后缀, 并且$y$的$endpos$是$x$的$endpos$的子集

struct SuffixAutomaton {
    int last, cnt;
    int ch[N << 1][26], fa[N << 1], len[N << 1], pos[N << 1], sz[N << 1];
    void init() {
        last = cnt = 1;
        memset(ch[1], 0, sizeof ch[1]);
        fa[1] = len[1] = 0;
    }
    int newnode(int idx) {
        ++cnt;
        memset(ch[cnt], 0, sizeof ch[cnt]);
        fa[cnt] = len[cnt] = 0;
        pos[cnt] = idx;
        return cnt;
    }
    int ins(int c) {
        int p = last , np = newnode(pos[last] + 1);
        last = np, len[np] = len[p] + 1;
        for(; p && !ch[p][c]; p = fa[p]) ch[p][c] = np;
        if(!p) fa[np] = 1;
        else {
            int q = ch[p][c];
            if(len[p] + 1 == len[q]) fa[np] = q;
            else
            {
                int nq = newnode(pos[p] + 1);
                len[nq] = len[p] + 1;
                memcpy(ch[nq], ch[q], sizeof ch[q]);
                fa[nq] = fa[q], fa[q] = fa[np] = nq;
                for(; ch[p][c] == q; p = fa[p]) ch[p][c] = nq;
            }
        }
        sz[np] = 1;
        return np;
    }
};
View Code

 

posted @ 2020-10-14 18:23  dz8gk0j  阅读(184)  评论(0编辑  收藏  举报