poj_1743 后缀数组
题目大意
给定一串数字,长度为N。定义数字中的某个连续的子串为一个"theme",只要子串满足:
(1)长度 >= 5
(2)和该子串相同或者该子串的“变种串”在整串数字中出现次数大于1
(3)假设整串中有k个该子串及其“变种串”,那么其中至少有两个不相重叠
求满足要求的 "theme" 串的最长长度。
题目分析
(1)首先考虑将“变种”串和原子串相互比较的问题,对字符串中所有索引大于等于1的字符都用该字符减去前一个字符,这样得到串的差串之后,原theme和其“变种”就一样了,此时只需要求差串中的最长相同子串,且这些子串之间不重叠。
求最长相同子串,可以考虑使用后缀数组和height数组。显然,height越大,则两个子串的公共前缀越长,越有可能是最长相同子串。但是,题目对"theme"串的要求(3)至少两个不重叠
,因此需要考虑height[i]在尽可能大的同时,保证SA[i]和SA[i-1]之间的差值要大于height[i]以保证不重叠。
(2)然后,试图求解是否存在长度为M的"theme"串。
容易看出,后缀Suffix(j)和Suffix(k)的最长公共前缀的长度为 height[rank[j]+1], height[rank[j]+2]...height[rank[k]]的最小值。i从1到N遍历,通过height[i]>=M将i分开,即将后缀分成若干组,每组中的后缀的公共前缀长度均大于等于M,且可以肯定组A中的某后缀t1和组B中的某后缀t2的公共前缀长度小于M。若存在这样的组,则可以确定找到了公共前缀大于等于M的子串,下一步需要确定这些子串不重叠。只需要在组内寻找 SA[i] 之间最大的查看,看是否大于子串的长度,若大于则可以确定不重叠。
(3)最后,求解"theme"串长度M的最大值,用二分法对"theme"串的可能长度进行二分求解,长度范围为0到N。每次二分得到中值M,先判断能否找到长度为k的"theme"串,若不能,则减小M,否则增加M。直到找到长度M最大的"theme"串。
实现(c++)
#define _CRT_SECURE_NO_WARNINGS #include<stdio.h> #include<string.h> #define MAX(a, b) a>b? a:b #define MAX_ARRAY_SIZE 20005 #define LETTERS 10000 int gStrLen; int gStr[MAX_ARRAY_SIZE]; int gCount[MAX_ARRAY_SIZE]; int gSuffixArray[MAX_ARRAY_SIZE]; int gRank[MAX_ARRAY_SIZE]; int gOrderBySecondKey[MAX_ARRAY_SIZE]; int gFirstKeyArray[MAX_ARRAY_SIZE]; int gHeight[MAX_ARRAY_SIZE]; bool Compare(int* arr, int a, int b, int step){ return arr[a] == arr[b] && arr[a + step] == arr[b + step]; } void GetStr(char* str){ memset(gStr, 0, sizeof(gStr)); gStrLen = strlen(str); for (int i = 0; i < gStrLen; i++){ gStr[i] = str[i] - 'a' + 1; } gStr[gStrLen++] = 0; } void GetSuffixArray(){ int n = gStrLen; memset(gCount, 0, sizeof(gCount)); for (int i = 0; i < n; i++){ gRank[i] = gStr[i]; gCount[gRank[i]] ++; } for (int i = 1; i < LETTERS; i++){ gCount[i] += gCount[i - 1]; } for (int i = n - 1; i >= 0; i--){ gSuffixArray[--gCount[gRank[i]]] = i; } int step = 1; int* rank = gRank, *order_by_second_key = gOrderBySecondKey; int m = LETTERS; while (step < n){ int p = 0; for (int i = n - step; i < n; i++){ order_by_second_key[p++] = i; } for (int i = 0; i < n; i++){ if (gSuffixArray[i] >= step){ order_by_second_key[p++] = gSuffixArray[i] - step; } } for (int i = 0; i < n; i++){ gFirstKeyArray[i] = rank[order_by_second_key[i]]; } for (int i = 0; i < m; i++){ gCount[i] = 0; } for (int i = 0; i < n; i++){ gCount[gFirstKeyArray[i]] ++; } for (int i = 1; i < m; i++){ gCount[i] += gCount[i - 1]; } for (int i = n - 1; i >= 0; i--){ gSuffixArray[--gCount[gFirstKeyArray[i]]] = order_by_second_key[i]; } int* tmp = rank; rank = order_by_second_key; order_by_second_key = tmp; rank[gSuffixArray[0]] = 0; p = 0; for (int i = 1; i < n; i++){ if (Compare(order_by_second_key, gSuffixArray[i], gSuffixArray[i - 1], step)){ rank[gSuffixArray[i]] = p; } else{ rank[gSuffixArray[i]] = ++p; } } m = p + 1; step *= 2; } } void GetHeight(){ int n = gStrLen; for (int i = 1; i < n; i++){ gRank[gSuffixArray[i]] = i; } int k = 0, j; gHeight[0] = 0; for (int i = 0; i < n - 1; i++){ j = gSuffixArray[gRank[i] - 1]; if (k){ k--; } while (i + k < n && j + k < n && gStr[i + k] == gStr[j + k]){ k++; } gHeight[gRank[i]] = k; } } bool Find(int k){ int end = 1; int min_pos, max_pos; while (end < gStrLen){ max_pos = min_pos = gSuffixArray[end-1]; while (end < gStrLen && gHeight[end] >= k - 1){ if (min_pos > gSuffixArray[end]){ min_pos = gSuffixArray[end]; } if (max_pos < gSuffixArray[end]){ max_pos = gSuffixArray[end]; } end ++; } if (max_pos - min_pos >= k){ return true; } end ++; } return false; } void printstr(int n){ printf("string = \n"); for (int i = 0; i < n; i++){ printf("%d ", gStr[i]); } printf("\n"); } void printsuffix(int n){ printf("suffix = \n"); for (int i = 0; i < n; i++){ printf("%d ", gSuffixArray[i]); } printf("\n"); } void printheigt(int n){ printf("height = \n"); for (int i = 0; i < n; i++){ printf("%d ", gHeight[i]); } printf("\n"); } int main(){ int n; while (true){ scanf("%d", &n); if (n == 0){ break; } for (int i = 0; i < n; i++){ scanf("%d", &gStr[i]); } int min = 100; for (int i = 1; i < n; i++){ gStr[i - 1] = gStr[i] - gStr[i - 1]; min = gStr[i - 1] < min ? gStr[i - 1] : min; } min--; for (int i = 0; i < n; i++){ gStr[i] -= min; } gStr[n-1] = 0; gStrLen = n; GetSuffixArray(); GetHeight(); // printstr(n); // printsuffix(n); // printheigt(n); int beg = 0, end = n, mid, max; bool flag = true; while (beg < end){ mid = (beg + end) / 2; if (Find(mid)){ beg = mid + 1; max = mid; } else{ if (mid <= 5){ flag = false; break; } end = mid; } } if (!flag){ printf("0\n"); } else{ printf("%d\n", max); } } return 0; }