poj_3261 后缀数组

题目大意

    给出一个数字串,找出其中至少重复K次的最长的子串长度。

题目分析

    直接用后缀数组来求解,限制height[i]的长度来对排好序的后缀进行分组(这种方法经常在字符串问题中被使用) 
    先判断是否有至少K个长度大于等于M的子串,这可以通过将height[i] >= M来对排好序的后缀进行分组,然后判断组内串的个数是否大于等于K来实现。 
    然后,用二分法得到最大的M。

实现(c++)

#define _CRT_SECURE_NO_WARNINGS
#include<stdio.h>
#include<string.h>
#define LETTERS 1000005
#define MAX_ARRAY_SIZE 1000005
int gSuffixArray[MAX_ARRAY_SIZE];
int gCount[MAX_ARRAY_SIZE];
int gOrderBySecondKey[MAX_ARRAY_SIZE];
int gRank[MAX_ARRAY_SIZE];
int gFirstKeyArray[MAX_ARRAY_SIZE];
int gHeight[MAX_ARRAY_SIZE];

int gStr[MAX_ARRAY_SIZE];
int gStrLen;

bool Compare(int* arr, int a, int b, int step){
	return arr[a] == arr[b] && arr[a + step] == arr[b + step];
}

void GetStr(char* str){
	memset(gStr, 0, sizeof(gStr));
	gStrLen = strlen(str);
	for (int i = 0; i < gStrLen; i++){
		gStr[i] = str[i] - 'a' + 1;
	}
	gStr[gStrLen] = 0;
	gStrLen++;
}
//求后缀数组
void GetSuffixArray(){
	int n = gStrLen;
	memset(gCount, 0, sizeof(gCount));
	for (int i = 0; i < n; i++){
		gRank[i] = gStr[i];
		gCount[gRank[i]] ++;
	}
	int m = LETTERS;
	for (int i = 1; i < m; i++){
		gCount[i] += gCount[i - 1];
	}
	for (int i = n - 1; i >= 0; i--){
		gSuffixArray[--gCount[gRank[i]]] = i;
	}

	int step = 1;
	int *rank = gRank, *order_by_second_key = gOrderBySecondKey;
	while (step < n){
		int p = 0;

		for (int i = n - step; i < n; i++){
			order_by_second_key[p++] = i;
		}
		for (int i = 0; i < n; i++){
			if (gSuffixArray[i] >= step){
				order_by_second_key[p++] = gSuffixArray[i] - step;
			}
		}
		for (int i = 0; i < n; i++){
			gFirstKeyArray[i] = rank[order_by_second_key[i]];
		}
		for (int i = 0; i < m; i++){
			gCount[i] = 0;
		}
		for (int i = 0; i < n; i++){
			gCount[gFirstKeyArray[i]] ++;
		}
		for (int i = 1; i < m; i++){
			gCount[i] += gCount[i - 1];
		}
		for (int i = n - 1; i >= 0; i--){
			gSuffixArray[--gCount[gFirstKeyArray[i]]] = order_by_second_key[i];
		}
		int* tmp = rank; rank = order_by_second_key; order_by_second_key = tmp;
		rank[gSuffixArray[0]] = p = 0;
		for (int i = 1; i < n; i++){
			if (Compare(order_by_second_key, gSuffixArray[i], gSuffixArray[i - 1], step)){
				rank[gSuffixArray[i]] = p;
			}
			else{
				rank[gSuffixArray[i]] = ++p;
			}
		}
		m = p + 1;
		step *= 2;
	}
}
//求height数组
void GetHeight(){
	int n = gStrLen;
	for (int i = 0; i < n; i++){
		gRank[gSuffixArray[i]] = i;
	}
	int k = 0, j;
	for (int i = 0; i < n; i++){
		if (k){
			k--;
		}
		j = gSuffixArray[gRank[i] - 1];
		while (j + k < n && i + k < n&& gStr[i + k] == gStr[j + k]){
			k++;
		}
		gHeight[gRank[i]] = k;
	}
}

bool Find(int k, int len){
	int end = 1;
	int count = 0;
	while (end < gStrLen){
		count = 1;
		while (end < gStrLen && gHeight[end] >= len){
			count++;
			end++;
		}
		if (count >= k){
			return true;
		}
		end++;
	}
	return false;
}

int main(){
	int n, k;
	scanf("%d %d", &n, &k);
	
	for (int i = 0; i < n; i++){
		scanf("%d", &gStr[i]); gStr[i]++;
	}
	gStr[n] = 0;
	gStrLen = n + 1;
	if (k == 1){
		printf("%d\n", n);
		return 0;
	}

	GetSuffixArray();
	GetHeight();
	int beg = 0, end = n;
	while (beg < end){
		int mid = (beg + end) / 2;
		if (Find(k, mid)){
			beg = mid + 1;
		}
		else{
			end = mid;
		}
	}
	printf("%d\n", beg-1);
	return 0;
}

 

posted @ 2015-09-24 15:51  农民伯伯-Coding  阅读(153)  评论(0编辑  收藏  举报