后缀数组小结

后缀数组又被称为字符串处理神器;

http://blog.csdn.net/xymscau/article/details/8798046 这里讲的非常好

实现rank排名是用到了倍增法和一个比较神奇的计数排序,时间复杂度是nlongn

height[i]存放的是排名第i的后缀与排名第i-1的后缀的最长前缀,

sa[i]存的是排名第i的后缀是第几位开头的

rk[i]存放第i个位置开头的后缀的字典序排名

Poj 2774,Poj1743,Poj3294,Poj3261,Poj2758

1.poj2774(后缀数组水题)

题意:给你两串字符,要你找出在这两串字符中都出现过的最长子串.........

思路:先用个分隔符将两个字符串连接起来,再用后缀数组求出height数组的值,找出一个height值最大并且i与i-1的sa值分别在两串字符中就好.....

正确性证明,另一个后缀是i,与它拥有最长公共前缀的的后缀j,我们知道i和j一定排名是相连的。

那么我们将两个字符用空格连接起来之后,如果答案是在i和j这两个位置,如果这两个位置的rank是不相连的,那么设中间有一个值是k,那么显然i与k,或者j与k是一个更优的解。

#include<iostream>
#include<string.h>
#include<stdio.h>
using namespace std;

#define rep(i,n) for(int i = 0;i < n; i++)
using namespace std;
const int size  = 200005,INF = 1<<30;
int rk[size],sa[size],height[size],w[size],wa[size],res[size];
void getSa (int len,int up) {
    int *k = rk,*id = height,*r = res, *cnt = wa;
    rep(i,up) cnt[i] = 0;
    rep(i,len) cnt[k[i] = w[i]]++;
    rep(i,up) cnt[i+1] += cnt[i];
    for(int i = len - 1; i >= 0; i--) {
        sa[--cnt[k[i]]] = i;
    }
    int d = 1,p = 0;
    while(p < len){
        for(int i = len - d; i < len; i++) id[p++] = i;
        rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
        rep(i,len) r[i] = k[id[i]];
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[r[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[r[i]]] = id[i];
        }
        swap(k,r);
        p = 0;
        k[sa[0]] = p++;
        rep(i,len-1) {
            if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                k[sa[i+1]] = p - 1;
            else k[sa[i+1]] = p++;
        }
        if(p >= len) return ;
        d *= 2,up = p, p = 0;
    }
}
void getHeight(int len) {
    rep(i,len) rk[sa[i]] = i;
    height[0] =  0;
    for(int i = 0,p = 0; i < len - 1; i++) {
        int j = sa[rk[i]-1];
        while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
            p++;
        }
        height[rk[i]] = p;
        p = max(0,p - 1);
    }
}
int getSuffix(char s[]) {
    int len = strlen(s),up = 0;
    for(int i = 0; i < len; i++) {
        w[i] = s[i];
        up = max(up,w[i]);
    }
    w[len++] = 0;
    getSa(len,up+1);
    getHeight(len);
    return len;
}const int maxa = 100000*2+1;
char str[maxa];
int main(){
    while(scanf("%s", str)!=EOF){
        int l = strlen(str);
        str[l] = ' ';
        scanf("%s", str+l+1);
        getSuffix(str);
        int ans = 0;
        int L = strlen(str);
        for(int i = 1;i < L; i++){
            if((sa[i-1] < l && sa[i] > l) || (sa[i-1] > l && sa[i] < l)){
                ans = max(ans, height[i]);
            }
        }
        printf("%d\n", ans);
    }
}
/*
abcde
bcde
*/
View Code

 

2.poj1743

题意:给一串数字,求变化相同,且不重叠的最长字符串

变化相同就是将字符串s[i]变成s[i]-s[i-1]

那么再求后缀数组的话height[i]代表的是两个长度是height[i]+1变化相等,而如果s[i]与s[j]间距是n的话那么他们在实际字符串中的间距也是n,所以如果两个地方的height最小值是n的话他们的间距应该是n+1才行。

二分答案的方法这里讲的很好http://blog.sina.com.cn/s/blog_6635898a0102e0me.html

#include<iostream>
#include<string.h>
#include<stdio.h>
using namespace std;

#define rep(i,n) for(int i = 0;i < n; i++)
using namespace std;
const int size  = 200005,INF = 1<<30;
int rk[size],sa[size],height[size],w[size],wa[size],res[size];
void getSa (int len,int up) {
    int *k = rk,*id = height,*r = res, *cnt = wa;
    rep(i,up) cnt[i] = 0;
    rep(i,len) cnt[k[i] = w[i]]++;
    rep(i,up) cnt[i+1] += cnt[i];
    for(int i = len - 1; i >= 0; i--) {
        sa[--cnt[k[i]]] = i;
    }
    int d = 1,p = 0;
    while(p < len){
        for(int i = len - d; i < len; i++) id[p++] = i;
        rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
        rep(i,len) r[i] = k[id[i]];
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[r[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[r[i]]] = id[i];
        }
        swap(k,r);
        p = 0;
        k[sa[0]] = p++;
        rep(i,len-1) {
            if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                k[sa[i+1]] = p - 1;
            else k[sa[i+1]] = p++;
        }
        if(p >= len) return ;
        d *= 2,up = p, p = 0;
    }
}
void getHeight(int len) {
    rep(i,len) rk[sa[i]] = i;
    height[0] =  0;
    for(int i = 0,p = 0; i < len - 1; i++) {
        int j = sa[rk[i]-1];
        while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
            p++;
        }
        height[rk[i]] = p;
        p = max(0,p - 1);
    }
}
int getSuffix(int s[], int n) {
    int len = n,up = 0;
    /*for(int i = 0;i  < len; i++){
        printf("%d ", s[i]);
    }puts("");*/
    for(int i = 0; i < len; i++) {
        w[i] = s[i];
        up = max(up,w[i]);
    }
    w[len++] = 0;
    getSa(len,up+1);
    getHeight(len);
    return len;
}const int maxa = 100000*2+1;
int str[maxa];
int a[maxa];
int judge(int ans, int n){
    int l = sa[0], r = sa[0];
    for(int i = 0;i <= n; i++){
        if(height[i] >= ans){
            l = min(l, sa[i]);
            r = max(r, sa[i]);
            if(r - l > ans)
                return 1;
        }
        else{
            l = r = sa[i];
        }
    }
    return 0;
}
int main(){
    int n;
    while(scanf("%d", &n)!=EOF){
        if(n == 0)return 0;
        for(int i = 0; i < n; i++){
            scanf("%d", &a[i]);
        }
        /*a[n] = a[n-1];
        n++;*/
        for(int i = 0; i < n-1; i++){
            str[i] = a[i+1] - a[i] + 100;
        }
        str[n-1] = 0;
        getSuffix(str, n-1);
        int l = 0, r = n-1;
        while(l < r){
            int mid = (l+r) / 2;
            if(judge(mid, n-1)) l = mid+1;
            else r = mid ;
        }
        //printf("%d\n" , l);
        if(l < 5){
            printf("0\n");
        }else{
            printf("%d\n", l);
        }
    }
}
/*
abcde
bcde
*/
View Code

 

3.poj3261

题意:找出一个字符串中的所有出现次数不小于K的最长字串:

依旧是二分:

#include<iostream>
#include<string.h>
#include<vector>
#include<map>
#include<set>
#include<stdio.h>
#include<algorithm>
using namespace std;

#define rep(i,n) for(int i = 0;i < n; i++)
using namespace std;
const int size  = 200005,INF = 1<<30;
int rk[size],sa[size],height[size],w[size],wa[size],res[size];
void getSa (int len,int up) {
    int *k = rk,*id = height,*r = res, *cnt = wa;
    rep(i,up) cnt[i] = 0;
    rep(i,len) cnt[k[i] = w[i]]++;
    rep(i,up) cnt[i+1] += cnt[i];
    for(int i = len - 1; i >= 0; i--) {
        sa[--cnt[k[i]]] = i;
    }
    int d = 1,p = 0;
    while(p < len){
        for(int i = len - d; i < len; i++) id[p++] = i;
        rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
        rep(i,len) r[i] = k[id[i]];
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[r[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[r[i]]] = id[i];
        }
        swap(k,r);
        p = 0;
        k[sa[0]] = p++;
        rep(i,len-1) {
            if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                k[sa[i+1]] = p - 1;
            else k[sa[i+1]] = p++;
        }
        if(p >= len) return ;
        d *= 2,up = p, p = 0;
    }
}
void getHeight(int len) {
    rep(i,len) rk[sa[i]] = i;
    height[0] =  0;
    for(int i = 0,p = 0; i < len - 1; i++) {
        int j = sa[rk[i]-1];
        while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
            p++;
        }
        height[rk[i]] = p;
        p = max(0,p - 1);
    }
}
int getSuffix(int s[], int n) {
    int len = n,up = 0;
    for(int i = 0; i < len; i++) {
        w[i] = s[i];
        up = max(up,w[i]);
    }
    w[len++] = 0;
    getSa(len,up+1);
    getHeight(len);
    return len;
}
const int maxa = 100000*2+5;
int num[maxa];
char str[maxa];
int mp[maxa];
int vis[200];
int que[maxa], qq[maxa];
int o;
int judge(int n, int mid, int K){
    int siz = 0;
    for(int i = 0; i <= n; i++){
        if(height[i] >= mid){
            siz ++;
        }else{
            if(siz >= K)return 1;
                siz = 1;
            }
    }
           // printf("%d == size\n", siz);
    if(siz >= K){
        return 1;
    }
    return 0;
}
int snum[maxa];
map<int,int>mp1;
int main(){
    int n, K;
    int first = 0;
    while(scanf("%d%d", &n, &K)!=EOF){
        mp1.clear();
        for(int i =0;i < n; i++){
            scanf("%d", &num[i]);
            snum[i] = num[i];
        }
        sort(snum, snum+n);
        for(int i = 0;i < n; i++){
            mp1[num[i]] = i+1;
        }
        for(int i = 0;i < n; i++){
            num[i] = mp1[num[i]];
        }
        getSuffix(num, n);
        int high = n + 10;
        int low = 0;
        while(low < high){
            int mid = (low + high) / 2;
            if(judge(n, mid, K)) low = mid+1;
            else high = mid;
        }
        printf("%d\n", low -1);
    }
}
View Code

 

 

4.poj3294

题意:给n个字符串,找出出现在大于一半字串中的最长字串,如果有多个按字典序输出

没看到按字典序输出卡了两天啊啊啊啊啊啊啊啊啊啊啊

用一些不同的字符去连接所有字串,二分答案,找出连续的height大于K的所有位置,如果分别属于不同字串就成立

#include<iostream>
#include<string.h>
#include<vector>
#include<set>
#include<stdio.h>
using namespace std;

#define rep(i,n) for(int i = 0;i < n; i++)
using namespace std;
const int size  = 200005,INF = 1<<30;
int rk[size],sa[size],height[size],w[size],wa[size],res[size];
void getSa (int len,int up) {
    int *k = rk,*id = height,*r = res, *cnt = wa;
    rep(i,up) cnt[i] = 0;
    rep(i,len) cnt[k[i] = w[i]]++;
    rep(i,up) cnt[i+1] += cnt[i];
    for(int i = len - 1; i >= 0; i--) {
        sa[--cnt[k[i]]] = i;
    }
    int d = 1,p = 0;
    while(p < len){
        for(int i = len - d; i < len; i++) id[p++] = i;
        rep(i,len)    if(sa[i] >= d) id[p++] = sa[i] - d;
        rep(i,len) r[i] = k[id[i]];
        rep(i,up) cnt[i] = 0;
        rep(i,len) cnt[r[i]]++;
        rep(i,up) cnt[i+1] += cnt[i];
        for(int i = len - 1; i >= 0; i--) {
            sa[--cnt[r[i]]] = id[i];
        }
        swap(k,r);
        p = 0;
        k[sa[0]] = p++;
        rep(i,len-1) {
            if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
                k[sa[i+1]] = p - 1;
            else k[sa[i+1]] = p++;
        }
        if(p >= len) return ;
        d *= 2,up = p, p = 0;
    }
}
void getHeight(int len) {
    rep(i,len) rk[sa[i]] = i;
    height[0] =  0;
    for(int i = 0,p = 0; i < len - 1; i++) {
        int j = sa[rk[i]-1];
        while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) {
            p++;
        }
        height[rk[i]] = p;
        p = max(0,p - 1);
    }
}
int getSuffix(int s[], int n) {
    int len = n,up = 0;
    for(int i = 0; i < len; i++) {
        w[i] = s[i];
        up = max(up,w[i]);
    }
    w[len++] = 0;
    getSa(len,up+1);
    getHeight(len);
    return len;
}
const int maxa = 100000*2+5;
int num[maxa];
char str[maxa];
int mp[maxa];
int vis[200];
int que[maxa], qq[maxa];
int o;
int judge(int n, int mid, int K){
    int ok = 0;
    int siz = 0;
    int oo = 0;
    int last = sa[0];
    memset(vis, 0, sizeof(vis));
    for(int i = 0; i <= n; i++){
       // printf("%d == hight[i] %d == sa[i] %d == mp\n", height[i], sa[i], mp[sa[i]]);
        if(height[i] >= mid){
            if(vis[mp[sa[i]]] == 0){
                vis[mp[sa[i]]] = 1;
                siz ++;
                last = sa[i];
                //printf("--%d %d\n", sa[i], mp[sa[i]]);
            }
        }else{
            memset(vis, 0, sizeof(vis));
            if(siz > K){
                qq[oo++] = last;
                ok = 1;
            }
            siz = 1;
            memset(vis, 0, sizeof(vis));
            vis[mp[sa[i]]] = 1;
        }
           // printf("%d == weizhi %d == size\n", sa[i], siz);
    }
           // printf("%d == size\n", siz);
    if(siz > K){
        qq[oo++] = last;
        ok = 1;
    }

    if(ok == 1){
        o = oo;
        for(int i = 0;i  < oo; i++){
            que[i] = qq[i];
        }
        return 1;
    }return 0;
}
int main(){
    int n;
    int first = 0;
    while(scanf("%d", &n), n){
        int l = 0;
        for(int i = 0;i  < n; i++){
            scanf("%s", str);
            int L = l;
            for(int k = 0; str[k] ; k++){
                num[L+k] = str[k];
                l++;
                mp[L+k] = i;
            }
            mp[l] = i;
            num[l++] = 500+i;
        }
        getSuffix(num, l);
        /*for(int i = 0;i  <=l ; i++){
            printf("%d %d\n", height[i], mp[sa[i]]);
        }
        judge(l, 6, n/2);
                    for(int i = 0; i < o; i++){
                for(int k = que[i]; k < que[i]+6; k++){
                    printf("%c", num[k]);
                }puts("");
            }
*/
        int high = 1005;
        int low = 0;
        while(low < high){
            int mid = (low + high) / 2;
            if(judge(l, mid, n/2)) low = mid+1;
            else high = mid;
        }
        if(first == 0)first = 1;
        else puts("");
        if(n == 1){printf("%s\n", str);
        continue;
        }
        if(low > 1){
            for(int i = 0; i < o; i++){
                for(int k = que[i]; k < que[i]+low-1; k++){
                    printf("%c", num[k]);
                }puts("");
            }
        }else
            puts("?");
    }
}
View Code

 

posted @ 2015-09-04 17:41  icodefive  阅读(246)  评论(0编辑  收藏  举报