字符串

//普通
void getnext1(char *s)
{
    int i = 0,j = -1,len = strlen(s);
    ne[0] = -1;
    while(i < len)
    {
        if(j == -1 || s[i] == s[j]) ne[++i] = ++j;
        else    j = ne[j];
    }
}

//加快预处理
void getnext2(char *s)
{
    int i = 0,j = -1,len = strlen(s);
    ne[0] = -1;
    while(i < len)
    {
        if(j == -1 || s[i] == s[j])
        {
            if(s[++i] == s[++j])    ne[i] = ne[j];
            else    ne[i] = j;
        }
        else    j = ne[j];
    }
}

//返回x在y中出现的次数,可以重叠
int kmp(char *x,char *y)
{
    getnext1(x);
    int i = 0,j = 0,ans = 0,leny = strlen(y),lenx = strlen(x);
    while(i < leny)
    {
        if(j == -1 || x[j] == y[i])
        {
            i++,j++;
            if(j == lenx)
            {
                ans++;
                j = ne[j];
            }
        }
        else    j = ne[j];
    }
    return ans;
}
KMP模版

//求浮动匹配
//l[i]表示i结点之前(包括i结点),小于a[i]的个数
//le[i]表示i结点之前(包括i结点),小于等于a[i]的个数
//l[i] == l[j] && le[i] == le[j],则匹配
#include<bits/stdc++.h>
using namespace std;

int n,k,s,a[100005],b[25005],ne[25005],tree[30],l[100005],le[100005];

inline int lowbit(int x)
{
    return x&-x;
}

void update(int pos,int x)
{
    while(pos <= s)
    {
        tree[pos] += x;
        pos += lowbit(pos);
    }
}

int getsum(int pos)
{
    int sum = 0;
    while(pos)
    {
        sum += tree[pos];
        pos -= lowbit(pos);
    }
    return sum;
}

void getnext1()
{
    memset(tree,0,sizeof(tree));
    int i = 0,j = -1;
    ne[0] = -1;
    while(i < k)
    {
        if(j == -1 || getsum(b[i]-1) == l[j] && getsum(b[i]) == le[j])
        {
            ne[++i] = ++j;
            if(i < k)   update(b[i],1);
        }
        else
        {
            for(int t = i-j;t < i-ne[j];t++)  update(b[t],-1);
            j = ne[j];
        }
    }
}

vector<int> kmp()
{
    getnext1();
    memset(tree,0,sizeof(tree));
    vector<int> ans;
    int i = 0,j = 0;
    update(a[0],1);
    while(i < n)
    {
        if(j == -1 || getsum(a[i]-1) == l[j] && getsum(a[i]) == le[j])
        {
            i++,j++;
            if(i < n)   update(a[i],1);
            if(j == k)
            {
                ans.push_back(i-k+1);
                for(int t = i-j;t < i-ne[j];t++)  update(a[t],-1);
                j = ne[j];
            }
        }
        else
        {
            for(int t = i-j;t < i-ne[j];t++)  update(a[t],-1);
            j = ne[j];
        }
    }
    return ans;
}

int main()
{
    while(~scanf("%d%d%d",&n,&k,&s))
    {
        memset(tree,0,sizeof(tree));
        memset(l,0,sizeof(l));
        memset(le,0,sizeof(le));
        memset(ne,0,sizeof(ne));
        for(int i = 0;i < n;i++)    scanf("%d",&a[i]);
        for(int i = 0;i < k;i++)
        {
            scanf("%d",&b[i]);
            update(b[i],1);
            l[i] = getsum(b[i]-1);
            le[i] = getsum(b[i]);
        }
        vector<int> v = kmp();
        printf("%d\n",v.size());
        for(int i = 0;i < v.size();i++) printf("%d\n",v[i]);
    }
    return 0;
}
浮动匹配

//ne[i]:x[i...m-1]与x[0...m-1]的最长公共前缀
//ex[i]:y[i...n-1]与x[0...m-1]的最长公共前缀
void getnext(char *s)
{
    int j = 0,len = strlen(s),k = 1;
    ne[0] = len;
    while(j+1 < len && s[j] == s[j+1])  j++;
    ne[1] = j;
    for(int i = 2;i < len;i++)
    {
        if(ne[i-k]+i < ne[k]+k) ne[i] = ne[i-k];
        else
        {
            j = max(0,ne[k]+k-i);
            while(i+j < len && s[i+j] == s[j])  j++;
            ne[i] = j;
            k = i;
        }
    }
}

void ekmp(char *x,char *y)
{
    getnext(x);
    int j = 0,lenx = strlen(x),leny = strlen(y);
    while(j < lenx && j < leny && x[j] == y[j]) j++;
    ex[0] = j;
    int k = 0;
    for(int i = 1;i < leny;i++)
    {
        if(ne[i-k]+i < ex[k]+k) ex[i] = ne[i-k];
        else
        {
            j = max(0,ex[k]+k-i);
            while(i+j < leny && j < lenx && y[i+j] == x[j]) j++;
            ex[i] = j;
            k = i;
        }
    }
}
扩展KMP

//abaa
//i:    0 1 3 4 5 6 7 8 9 10
//a[i]: $ # a # b # a # a #
//p[i]: 1 1 2 1 4 1 2 3 2 1
#include<bits/stdc++.h>
using namespace std;

char s[100005],a[200005];
int p[200005];

void manacher(int len)
{
    int mx = 0,id;
    for(int i = 1;i < len;i++)
    {
        if(mx > i)    p[i] = min(p[2*id-i],mx-i);
        else    p[i] = 1;
        while(a[i+p[i]] == a[i-p[i]])   p[i]++;
        if(p[i]+i > mx)
        {
            mx = p[i]+i;
            id = i;
        }
    }
}
int main()
{
    scanf("%s",s);
    a[0] = '$';
    a[1] = '#';
    int len = 2;
    for(int i = 0;s[i];i++)
    {
        a[len++] = s[i];
        a[len++] = '#';
    }
    manacher(len);
    int ans = 0;
    for(int i = 0;i < len;i++)    ans = max(ans,p[i]);
    printf("%d\n",ans-1);
    return 0;
}
最长回文字串Manacher

//最小表示:一个环选一个起点使字典序最小
//返回最小表示坐标
int minpre(char *s)
{
    int len = strlen(s);
    for(int i = 0;i < len;i++)  s[i+len] = s[i];
    int i = 0,j = 1;
    while(i < len && j < len)
    {
        int k = 0;
        while(s[i+k] == s[j+k] && k < len)  k++;
        if(k == len)    break;
        if(s[i+k] > s[j+k]) i = i+k+1;
        else    j = j+k+1;
        if(i == j)  j++;
    }
    return min(i,j);
}
最小表示

//添加
void add(char *s,int x)
{
    int now = 0;
    for(int i = 0;i < strlen(s);i++)
    {
        int c = s[i]-'a';
        if(!ch[now][c])
        {
            ch[now][c] = ++sz;
            cnt[sz] = 0;
        }
        now = ch[now][c];
        cnt[now]++;
    }
}

//查找数量
int getnum(char *s)
{
    int now = 0;
    for(int i = 0;i < strlen(s);i++)
    {
        int c = s[i]-'a';
        if(!ch[now][c]) return 0;
        now = ch[now][c];
    }
    return cnt[now];
}
trie树

//用n个模式串建立自动机
//求目标中出现了几个模式串

struct Trie
{
    int next[500000][26],fail[500005],num[500005],root,cnt;
    int newnode()
    {
        for(int i = 0;i < 26;i++)   next[cnt][i] = -1;
        num[cnt++] = 0;
        return cnt-1;
    }
    void init()
    {
        cnt = 0;
        root = newnode();
    }
    void insert(char *s)
    {
        int now = root,len = strlen(s);
        for(int i = 0;i < len;i++)
        {
            int c = s[i]-'a';
            if(next[now][c] == -1)  next[now][c] = newnode();
            now = next[now][c];
        }
        num[now]++;
    }
    void build()
    {
        queue<int> q;
        fail[root] = root;
        for(int i = 0;i < 26;i++)
        {
            if(next[root][i] == -1) next[root][i] = root;
            else
            {
                fail[next[root][i]] = root;
                q.push(next[root][i]);
            }
        }
        while(!q.empty())
        {
            int now = q.front();
            q.pop();
            for(int i = 0;i < 26;i++)
            {
                if(next[now][i] == -1)  next[now][i] = next[fail[now]][i];
                else
                {
                    fail[next[now][i]] = next[fail[now]][i];
                    q.push(next[now][i]);
                }
            }
        }
    }
    int query(char *s)
    {
        int now = root,ans = 0,len = strlen(s);
        for(int i = 0;i < len;i++)
        {
            now = next[now][s[i]-'a'];
            int t = now;
            while(t != root)
            {
                ans += num[t];
                num[t] = 0;
                t = fail[t];
            }
        }
        return ans;
    }
    void debug()
    {
        for(int i = 0;i < cnt;i++)
        {
            printf("id = %3d,fail = %3d,num = %3d,chi = [",i,fail[i],num[i]);
            for(int j = 0;j < 26;j++)   printf("%2d",next[i][j]);
            printf("]\n");
        }
    }
};
int n;
char s[1000001];
Trie ac;

int main()
{
    int T;
    scanf("%d",&T);
    while(T--)
    {
        scanf("%d",&n);
        ac.init();
        while(n--)
        {
            scanf("%s",s);
            ac.insert(s);
        }
        ac.build();
        scanf("%s",s);
        printf("%d\n",ac.query(s));
    }
    return 0;
}
AC自动机

//O(nlogn)
/待排序数组长度n,放在0~n中,最后补0
//sa[i]:每个后缀串从小到大排第i小的位置
//rank[i]:i位置的从小到大排序位置
//height[i]:sa[i]和sa[i-1]对应后缀的最长公共前缀
//n = 8
//num[i]:   1 1 2 1 1 1 1 2 0       num[8]加0
//sa[i]:      8 3 4 5 0 6 1 7 2       num[0~n]有效
//rank[i]:  4 6 8 1 2 3 5 7 0       num[0~n-1]有效
//height[i]:0 0 3 2 3 1 2 0 1       num[2~n]有效

int t1[N],t2[N],c[N],sa[N],rank[N],height[N];
int mm[200005],best[20][200005],rmq[200005];

int main()
{
    gets(s);
    int len = strlen(s);
    for(int i = 0;i < len;i++)  r[i] = s[i];
    da(r,sa,rk,height,len,128);
    return 0;
}

bool cmp(int *r,int a,int b,int l)
{
    return r[a] == r[b] && r[a+l] == r[b+l];
}

void da(int *r,int *sa,int *rank,int *height,int n,int m)
{
    r[n] = 0;
    n++;
    int *x = t1,*y = t2;
    for(int i = 0;i < m;i++)    c[i] = 0;
    for(int i = 0;i < n;i++)    c[x[i] = r[i]]++;
    for(int i = 1;i < m;i++)    c[i] += c[i-1];
    for(int i = n-1;i >= 0;i--) sa[--c[x[i]]] = i;
    for(int j = 1;j <= n;j <<= 1)
    {
        int p = 0;
        for(int i = n-j;i < n;i++)  y[p++] = i;
        for(int i = 0;i < n;i++)
        {
            if(sa[i] >= j)  y[p++] = sa[i]-j;
        }
        for(int i = 0;i < m;i++)    c[i] = 0;
        for(int i = 0;i < n;i++)    c[x[y[i]]]++;
        for(int i = 1;i < m;i++)    c[i] += c[i-1];
        for(int i = n-1;i >= 0;i--) sa[--c[x[y[i]]]] = y[i];
        swap(x,y);
        p = 1;
        x[sa[0]] = 0;
        for(int i = 1;i < n;i++)    x[sa[i]] = cmp(y,sa[i-1],sa[i],j)?p-1:p++;
        if(p >= n)  break;
        m = p;
    }
    int k = 0;
    n--;
    for(int i = 0;i <= n;i++)   rank[sa[i]] = i;
    for(int i = 0;i < n;i++)
    {
        if(k)   k--;
        int t = sa[rank[i]-1];
        while(r[i+k] == r[t+k]) k++;
        height[rank[i]] = k;
    }
}


void initrmq(int n)
{
    mm[0] = -1;
    for(int i = 1;i < n;i++)    mm[i] = (i&(i-1) == 0)?mm[i-1]+1:mm[i-1];
    for(int i = 1;i < n;i++)    best[0][i] = i;
    for(int i = 1;i <= mm[n-1];i++)
    {
        for(int j = 1;j+(1<<i)-1 < n;j++)
        {
            int a = best[i-1][j],b = best[i-1][j+(1<<(i-1))];
            best[i][j] = rmq[a] < rmq[b]?a:b;
        }
    }
}
int askrmq(int a,int b)
{
    int t = mm[b-a+1];
    b -= (1<<t)-1;
    a = best[t][a];
    b = best[t][b];
    return rmq[a] < rmq[b]?a:b;
}

//求a,b位置开始的后缀的最长公共前缀
int lcp(int a,int b)
{
    a = rk[a];
    b = rk[b];
    if(a > b)   swap(a,b);
    return height[askrmq(a+1,b)];
}
后缀数组DA

//O(n)
//所有数组开3倍
#include<bits/stdc++.h>
#define F(x) ((x)/3+((x)%3 == 1?0:tb))
#define G(x) ((x) < tb?(x)*3+1:((x)-tb)*3+2)
#define N 2005
using namespace std;

int wa[3*N],wb[3*N],wv[3*N],wss[3*N],r[3*N],sa[3*N],rk[3*N],height[3*N];
char s[3*N];

int c0(int *r,int a,int b)
{
    return r[a] == r[b] && r[a+1] == r[b+1] && r[a+2] == r[b+2];
}

int c12(int k,int *r,int a,int b)
{
    if(k == 2)  return r[a] < r[b] || r[a] == r[b] && c12(1,r,a+1,b+1);
    return r[a] < r[b] || r[a] == r[b] && wv[a+1] < wv[b+1];
}

void sort(int *r,int *a,int *b,int n,int m)
{
    for(int i = 0;i < n;i++)    wv[i] = r[a[i]];
    for(int i = 0;i < m;i++)    wss[i] = 0;
    for(int i = 0;i < n;i++)    wss[wv[i]]++;
    for(int i = 1;i < m;i++)    wss[i] += wss[i-1];
    for(int i = n-1;i >= 0;i--) b[--wss[wv[i]]] = a[i];
}

void dc3(int *r,int *sa,int n,int m)
{
    int *rn = r+n,*san = sa+n,ta = 0,tb = (n+1)/3,tbc = 0,i,j,p;
    r[n]=r[n+1]=0;
    for(i = 0;i < n;i++)
    {
        if(i%3) wa[tbc++] = i;
    }
    sort(r+2,wa,wb,tbc,m);
    sort(r+1,wb,wa,tbc,m);
    sort(r,wa,wb,tbc,m);
    for(p = 1,rn[F(wb[0])] = 0,i = 1;i < tbc;i++)
    {
        rn[F(wb[i])] = c0(r,wb[i-1],wb[i])?p-1:p++;
    }
    if(p < tbc) dc3(rn,san,tbc,p);
    else
    {
        for(i = 0;i < tbc;i++)  san[rn[i]] = i;
    }
    for(i = 0;i < tbc;i++)
    {
        if(san[i] < tb) wb[ta++] = san[i]*3;
    }
    if(n%3 == 1)    wb[ta++] = n-1;
    sort(r,wb,wa,ta,m);
    for(i = 0;i < tbc;i++)  wv[wb[i] = G(san[i])]=i;
    for(i = 0,j = 0,p = 0;i < ta && j < tbc;p++)    sa[p] = c12(wb[j]%3,r,wa[i],wb[j])?wa[i++]:wb[j++];
    for(;i < ta;p++)    sa[p] = wa[i++];
    for(;j < tbc;p++)   sa[p] = wb[j++];
}

void da(int *r,int *sa,int *rank,int *height,int n,int m)
{
    for(int i = n;i < n*3;i++)  r[i] = 0;
    dc3(r,sa,n+1,m);
    int k = 0;
    for(int i = 0;i <= n;i++)   rank[sa[i]] = i;
    for(int i = 0;i < n;i++)
    {
        if(k)   k--;
        int t = sa[rank[i]-1];
        while(r[i+k] == r[t+k]) k++;
        height[rank[i]] = k;
    }
}
后缀数组DC3

后缀自动机:

1.所有的子串都能够由root走到。

2.所有走到终止状态的路径都是后缀。

3.每个状态s代表的串的长度是区间(lenpre,lens]。

4.对于每个状态s,它代表的所有串在原串中出现次数和每次出现的右端点相同。

5.在后缀自动机的Parent树中,每个状态的right集合都是其父状态right集合的子集。

6.后缀自动机的Parent树是原串的反向前缀树。

7.两个串的最长公共后缀,位于这两个串对应状态在Parent树上的最近公共祖先状态。

struct samnode
{
    int len,right,pre,sum,next[26];
    void clear()
    {
        len = 0;
        right = 0;
        sum = 0;
        pre = -1;
        memset(next,-1,sizeof(next));
    }
}st[2*N];
int n,sz,root,last;

void saminit()
{
    sz = 0;
    root = last = 0;
    st[root].clear();
}

void samadd(int w)
{
    int p = last,now = ++sz;
    last = now;
    st[now].clear();
    st[now].len = st[p].len+1;
    st[now].right = 1;
    while(p != -1 && st[p].next[w] == -1)
    {
        st[p].next[w] = now;
        p = st[p].pre;
    }
    if(p == -1)
    {
        st[now].pre = root;
        return;
    }
    int q = st[p].next[w];
    if(st[q].len == st[p].len+1)
    {
        st[now].pre = q;
        return;
    }
    int neww = ++sz;
    st[neww].clear();
    memcpy(st[neww].next,st[q].next,sizeof(st[q].next));
    st[neww].len = st[p].len+1;
    st[neww].pre = st[q].pre;
    st[q].pre = neww;
    st[now].pre = neww;
    while(p != -1 && st[p].next[w] == q)
    {
        st[p].next[w] = neww;
        p = st[p].pre;
    }
}

void sambuild(char *s)
{
    saminit();
    int len = strlen(s);
    for(int i = 0;i < len;i++)
        samadd(s[i]-'a');
}
后缀自动机模版
int main()
{
    scanf("%s",s);
    sambuild(s);
    scanf("%s",s);
    int len = strlen(s),ans = 0;
    int p = root,l = 0;
    for(int i = 0;i < len;i++)
    {
        int c = s[i]-'a';
        if(st[p].next[c] != -1)
        {
            l++;
            p = st[p].next[c];
        }
        else
        {
            while(p != -1 &&st[p].next[c] == -1)    p = st[p].pre;
            if(p == -1)
            {
                l = 0;
                p = root;
            }
            else
            {
                l = st[p].len+1;
                p = st[p].next[c];
            }
        }
        ans = max(ans,l);
    }
    printf("%d\n",ans);
    return 0;
}
求最长公共字串
int main()
{
    int T;
    scanf("%d",&T);
    while(T--)
    {
        scanf("%s",s);
        int len = strlen(s);
        for(int i = 0;i < len;i++)  s[i+len] = s[i];
        s[len+len] = 0;
        sambuild(s);
        int now = root;
        for(int i = 1;i <= len;i++)
        {
            for(int j = 0;j < 26;j++)
            {
                if(st[now].next[j] != -1)
                {
                    now = st[now].next[j];
                    break;
                }
            }
        }
        printf("%d\n",st[now].len-len+1);
    }
}
求最小表示
void sambuild(char *s)
{
    saminit();
    int len = strlen(s);
    for(int i = 0;i < len;i++)
        samadd(s[i]-'a');
}

char s[N];
int k,num[N],top[2*N];

int main()
{
    int T;
    scanf("%d",&T);
    while(T--)
    {
        scanf("%d%s",&k,s);
        sambuild(s);
        int len = strlen(s);
        memset(num,0,sizeof(num));
        for(int i = 1;i <= sz;i++)  num[st[i].len]++;
        for(int i = 1;i <= len;i++) num[i] += num[i-1];
        for(int i = sz;i >= 1;i--)  top[num[st[i].len]--] = i;
        for(int i = sz;i >= 1;i--)
        {
            int p = top[i];
            if(st[p].pre != -1) st[st[p].pre].right += st[p].right;
        }
        long long ans = 0;
        for(int i = 1;i <= sz;i++)
        {
            if(st[i].right >= k)    ans += st[i].len-st[st[i].pre].len;
        }
        printf("%lld\n",ans);
    }
    return 0;
}
求出现次数大于k的子串数量
char s[N];
int t,k,num[N] = {0},top[2*N];

void dfs(int now,int k)
{
    if(k <= st[now].right)  return;
    k -= st[now].right;
    for(int i = 0;i < 26;i++)
    {
        int p = st[now].next[i];
        if(p == -1) continue;
        if(k <= st[p].sum)
        {
            printf("%c",i+'a');
            dfs(p,k);
            return;
        }
        k -= st[p].sum;
    }
}

int main()
{
    scanf("%s%d%d",s,&t,&k);
    sambuild(s);
    int len = strlen(s);
    for(int i = 1;i <= sz;i++)  num[st[i].len]++;
    for(int i = 1;i <= len;i++) num[i] += num[i-1];
    for(int i = sz;i >= 1;i--)  top[num[st[i].len]--] = i;
    for(int i = 1;i <= sz;i++)  st[i].right = 1;
    st[0].right = 0;
    for(int i = sz;i >= 0;i--)
    {
        int p = top[i];
        st[p].sum = st[p].right;
        for(int j = 0;j < 26;j++)
        {
            int pp = st[p].next[j];
            if(pp == -1)    continue;
            st[p].sum += st[pp].sum;
        }
    }
    if(k > st[0].sum)   printf("-1\n");
    else
    {
        dfs(0,k);
        printf("\n");
    }
    return 0;
}
求第k小的子串(重复算一个)
char s[N];
int t,k,num[N] = {0},top[2*N];

void dfs(int now,int k)
{
    if(k <= st[now].right)  return;
    k -= st[now].right;
    for(int i = 0;i < 26;i++)
    {
        int p = st[now].next[i];
        if(p == -1) continue;
        if(k <= st[p].sum)
        {
            printf("%c",i+'a');
            dfs(p,k);
            return;
        }
        k -= st[p].sum;
    }
}

int main()
{
    scanf("%s%d%d",s,&t,&k);
    sambuild(s);
    int len = strlen(s);
    for(int i = 1;i <= sz;i++)  num[st[i].len]++;
    for(int i = 1;i <= len;i++) num[i] += num[i-1];
    for(int i = sz;i >= 1;i--)  top[num[st[i].len]--] = i;
    for(int i = sz;i >= 1;i--)
    {
        int p = top[i];
        if(st[p].pre != -1) st[st[p].pre].right += st[p].right;
    }
    for(int i = sz;i >= 0;i--)
    {
        int p = top[i];
        st[p].sum = st[p].right;
        for(int j = 0;j < 26;j++)
        {
            int pp = st[p].next[j];
            if(pp == -1)    continue;
            st[p].sum += st[pp].sum;
        }
    }
    if(k > st[0].sum)   printf("-1\n");
    else
    {
        dfs(0,k);
        printf("\n");
    }
    return 0;
}
求第k小的字串(重复算多个)

#include<bits/stdc++.h>
using namespace std;


const int HASH = 10007;
const int N = 2010;
const int SEED = 13331;
char s[N];

struct HASHMAP
{
    int head[N],next[N],size,f[N];
    unsigned long long state[N];
    void init()
    {
        size = 0;
        memset(state,-1,sizeof(state));
    }
    int insert(unsigned long long x,int id)
    {
        int h = x%HASH;
        for(int i = head[h];i != -1;i = next[i])
        {
            if(x == state[i])   return f[i];
        }
        f[size] = id;
        state[size] = x;
        next[size] = head[h];
        head[h] = size++;
        return 0;
    }
};

int main()
{
    gets(s);
    int len = strlen(s);
    unsigned long long t = 0;
    for(int i = 1;i <= len;i++) t = t*SEED+s[i-1];
}
字符串hash

 
posted @ 2017-09-02 22:38  zzzzzzzzhu  阅读(213)  评论(0编辑  收藏  举报