poj_3261 后缀数组
题目大意
给出一个数字串,找出其中至少重复K次的最长的子串长度。
题目分析
直接用后缀数组来求解,限制height[i]的长度来对排好序的后缀进行分组(这种方法经常在字符串问题中被使用)。
先判断是否有至少K个长度大于等于M的子串,这可以通过将height[i] >= M来对排好序的后缀进行分组,然后判断组内串的个数是否大于等于K来实现。
然后,用二分法得到最大的M。
实现(c++)
#define _CRT_SECURE_NO_WARNINGS #include<stdio.h> #include<string.h> #define LETTERS 1000005 #define MAX_ARRAY_SIZE 1000005 int gSuffixArray[MAX_ARRAY_SIZE]; int gCount[MAX_ARRAY_SIZE]; int gOrderBySecondKey[MAX_ARRAY_SIZE]; int gRank[MAX_ARRAY_SIZE]; int gFirstKeyArray[MAX_ARRAY_SIZE]; int gHeight[MAX_ARRAY_SIZE]; int gStr[MAX_ARRAY_SIZE]; int gStrLen; bool Compare(int* arr, int a, int b, int step){ return arr[a] == arr[b] && arr[a + step] == arr[b + step]; } void GetStr(char* str){ memset(gStr, 0, sizeof(gStr)); gStrLen = strlen(str); for (int i = 0; i < gStrLen; i++){ gStr[i] = str[i] - 'a' + 1; } gStr[gStrLen] = 0; gStrLen++; } //求后缀数组 void GetSuffixArray(){ int n = gStrLen; memset(gCount, 0, sizeof(gCount)); for (int i = 0; i < n; i++){ gRank[i] = gStr[i]; gCount[gRank[i]] ++; } int m = LETTERS; for (int i = 1; i < m; i++){ gCount[i] += gCount[i - 1]; } for (int i = n - 1; i >= 0; i--){ gSuffixArray[--gCount[gRank[i]]] = i; } int step = 1; int *rank = gRank, *order_by_second_key = gOrderBySecondKey; while (step < n){ int p = 0; for (int i = n - step; i < n; i++){ order_by_second_key[p++] = i; } for (int i = 0; i < n; i++){ if (gSuffixArray[i] >= step){ order_by_second_key[p++] = gSuffixArray[i] - step; } } for (int i = 0; i < n; i++){ gFirstKeyArray[i] = rank[order_by_second_key[i]]; } for (int i = 0; i < m; i++){ gCount[i] = 0; } for (int i = 0; i < n; i++){ gCount[gFirstKeyArray[i]] ++; } for (int i = 1; i < m; i++){ gCount[i] += gCount[i - 1]; } for (int i = n - 1; i >= 0; i--){ gSuffixArray[--gCount[gFirstKeyArray[i]]] = order_by_second_key[i]; } int* tmp = rank; rank = order_by_second_key; order_by_second_key = tmp; rank[gSuffixArray[0]] = p = 0; for (int i = 1; i < n; i++){ if (Compare(order_by_second_key, gSuffixArray[i], gSuffixArray[i - 1], step)){ rank[gSuffixArray[i]] = p; } else{ rank[gSuffixArray[i]] = ++p; } } m = p + 1; step *= 2; } } //求height数组 void GetHeight(){ int n = gStrLen; for (int i = 0; i < n; i++){ gRank[gSuffixArray[i]] = i; } int k = 0, j; for (int i = 0; i < n; i++){ if (k){ k--; } j = gSuffixArray[gRank[i] - 1]; while (j + k < n && i + k < n&& gStr[i + k] == gStr[j + k]){ k++; } gHeight[gRank[i]] = k; } } bool Find(int k, int len){ int end = 1; int count = 0; while (end < gStrLen){ count = 1; while (end < gStrLen && gHeight[end] >= len){ count++; end++; } if (count >= k){ return true; } end++; } return false; } int main(){ int n, k; scanf("%d %d", &n, &k); for (int i = 0; i < n; i++){ scanf("%d", &gStr[i]); gStr[i]++; } gStr[n] = 0; gStrLen = n + 1; if (k == 1){ printf("%d\n", n); return 0; } GetSuffixArray(); GetHeight(); int beg = 0, end = n; while (beg < end){ int mid = (beg + end) / 2; if (Find(k, mid)){ beg = mid + 1; } else{ end = mid; } } printf("%d\n", beg-1); return 0; }