字符串专题

字符串Hash

主要思想

  • 使用进制转化的方法表示字符串

给出N个只有小写字母的字符串,求其中不同的字符串的个数

const int MOD = 1000000007;//使用这两个数可以有效避免冲突
const int F = 10000019;
vector<int>ans;
string a, b;
int n;
long long getHash(string a)
{
	long long H = 0;
	for (int i = 0; i < a.length(); i++)
	{
		H = (H * F + a[i] - 'a') % MOD;
	}
	return H;
}
int main()
{
	scanf("%d", &n);
	string str;
	for (int i = 0; i < n; i++)
	{
		cin >> str;
		ans.push_back(getHash(str));
	}
	sort(ans.begin(), ans.end());
	if (ans.size() == 0)
	{
		printf("%d", 0);
		return 0;
	}
	int l = 1;
	for (int i = 0; i < ans.size() - 1; i++)
	{
		if (ans[i] != ans[i + 1])
		{
			l++;
		}
	}
	printf("%d", l);
	return 0;
}

KMP

理解

  • pNext[i]表示当i与j+1失配时,j应该回退的位置
  • pNext[i]表示模式串pattern的p[0-i]的前缀和后缀相等的最大k

常规KMP

//KMP 
const int max_n = 100;
int pNext[max_n];
void getNext(int n,char str[]) {
	int j = -1;
	for (int i = 0; i < n; i++)
	{
		while (j != -1 && str[j + 1] != str[i])
		{
			j = pNext[j];
		}
		if (str[j + 1] == str[i])
			j++;
		pNext[i] = j;
	}
}
bool KMP(int n, char str[], int n2, char pa[])
{
	getNext(n2, pa);
	int j = -1;
	for (int i = 0; i < n; i++)
	{
		while (j != -1 && str[i] != pa[j + 1])
		{
			j = pNext[j];
		}
		if (str[i] != pa[j + 1])
		{
			j++;
		}
		if (j == n2 - 1)
			return true;
	}
	return false;
}

统计出现个数KMP

//KMP 
const int max_n = 100;
int pNext[max_n];
void getNext(int n,char str[]) {
	int j = -1;
	for (int i = 0; i < n; i++)
	{
		while (j != -1 && str[j + 1] != str[i])
		{
			j = pNext[j];
		}
		if (str[j + 1] == str[i])
			j++;
		pNext[i] = j;
	}
}
int KMP(int n, char str[], int n2, char pa[])
{
	getNext(n2, pa);
	int j = -1;
	int ans = 0;
	for (int i = 0; i < n; i++)
	{
		while (j != -1 && str[i] != pa[j + 1])
		{
			j = pNext[j];
		}
		if (str[i] != pa[j + 1])
		{
			j++;
		}
		if (j == n2 - 1)
		{
			ans++;
			j = pNext[j];
		}
	}
	return ans;
}
posted @ 2021-09-09 22:48  小帆敲代码  阅读(26)  评论(0编辑  收藏  举报