字符串匹配算法
字符串暴力匹配算法
算法步骤:
用i指向原文本,j指向待匹配字符串。匹配时i,j下标递增,不匹配时待匹配字符串下标j置零,并使用待匹配字符串第0号元素循环匹配此时失配的文本字符直到匹配。
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
void searchString(char* text,char* mystring){
int i = 0;
int j = 0;
int mystring_len = strlen(mystring);
int text_len = strlen(text);
int len = text_len;
/*printf("mystring_len is %d,text_len is %d\n",mystring_len,text_len);*/
while(len > 0){
if(text[i] == mystring[j]){
i++;j++;len--;
/*printf("i is %d,j is %d\n",i,j);*/
if(j == mystring_len){
printf("i = %d,mystring_len = %d,matched at %d\n",i,mystring_len,i-mystring_len);
}
}else if(text[i] != mystring[j] && i < text_len){
j=0;
while(mystring[0] != text[i]){
i++;len--;
}
}
}
}
int main(){
char* mystring = "ABABABCBACBA";
char* text = "ABAABAABASscscacsaABABABCBACBASCASFFSAFAFWDAABABABCBACBA";
searchString(text,mystring);
return 0;
}
i = 30,mystring_len = 12,matched at 18
i = 56,mystring_len = 12,matched at 44
效率分析:
每次失配文本字符都需要和待匹配字符串首个字符元素进行对比,整体来看整个程序所有的文本字符都需要被比较一次,时间复杂度为O(n),n为文本字符的长度。
KMP字符串匹配算法
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
void prefix_table(char pattern[], int prefix[], int n){ //构建最大前后缀匹配表
prefix[0] = 0;
int len = 0; // The most match
int i = 1;
while( i < n ) {
if( pattern[i] == pattern[len] ){
len++;
prefix[i] = len;
i++;
} else {
if(len > 0){
len = prefix[len - 1];
} else {
prefix[i] = len;
i++;
}
}
}
}
void move_prefix_table(int prefix[], int n){ //移动前后缀最大匹配表
int i;
for (i = n; i > 0; i--){
prefix[i] = prefix[i-1];
}
prefix[0] = -1;
}
void kmp_search(char text[],char pattern[]){ //kmp搜索函数
int n = strlen(pattern);
int m = strlen(text);
int* prefix = malloc(sizeof(int) * n);
prefix_table(pattern,prefix,n);
move_prefix_table(prefix,n);
int i = 0;
int j = 0;
while(i < m){
if(j == n-1 && text[i] == pattern[j]){
printf("Found pattern at %d\n",i-j+1);
j = prefix[j];
}
if(text[i] == pattern[j]){
i++; j++;
}else{
j = prefix[j];
if(j == -1){
i++; j++;
}
}
}
}
int main(){
char pattern[] = "ABABCABAA";
char text[] = "ABABAABABCABAABABAC";
kmp_search(text,pattern);
/*
int prefix[9];
int n = 9;
prefix_table(pattern,prefix,n);
move_prefix_table(prefix,n);
int i;
for (i=0;i<n;i++){
printf("%d\n",prefix[i]);
}
*/
return 0;
}