最长公共字串
问题:
求字符串str1,str2的最长公共子串的长度。
1. 动态规划方法
算法:
定义二元函数函数f(m,n):分别以str1[m],str2[n]结尾的连续公共子串的长度
而对于f(m+1,n+1) 有以下两种情况
1.str1[m+1] != str2[n+1],则有f(m+1,n+1) =0
2.str1[m+1] == str2[n+1],则有f(m+1,n+1) = f(m,n) + 1
另外f(0,j) = 0(j>=0)
f(j,0) = 0 (j>=0)
算法的c++代码实现如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
int commstr( char *str1, char *str2) { int len1= strlen (str1),len2= strlen (str2),row,col,max=0; int **pf = new int *[len1+1]; //动态分配一个二维数组作为辅助空间 for (row=0; row < len1; row++) pf[row] = new int [len2+1]; //数组赋初值 for (row=0; row < len1+1; row++) pf[row][0] = 0; for (col=0; col < len2+1; col++) pf[0][col] = 0; for (row=1; row < len1+1; row++) for (col=1;col < len2+1; row++) { if (str1[row-1] == str2[col-1]) { pf[row][col] = pf[row-1][col-1] + 1; max = pf[row][col] > max ? pf[row][col] : max; } else pf[row][col] = 0; } //空间回收 for (row=0; row delete [] pf[row]; delete [] pf; return max; } |
优化:代码采用了一个(len1+1)*(len2+1)的二维数组来存储临时数据,然而每一个row的值只跟它前一row的值有关系,因此,完全可以用两个数组来代替二维数组。
2. 分治方法
算法:
1) getComab(stra, strb), 求解以stra[0]开头的最长公共字串coma;
3) getCom(stra, strb), 求解stra, strb的最长公共字串coms;
stra,strb的最长公共字串 coms = maxlen(getComab(stra,strb), getComab(stra+1,strb) ......);
算法的c++代码实现如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
//求解以stra[0]开始的最长公共字串 vector< char > getComab( char * stra, char * strb ) { char * p = stra; char * q = strb; vector< char > comab; if ( *stra == '\0' ) { comab.push_back( '\0' ); return comab; } // 没有公共字串 while ( *p != *q ) { if ( *q == '\0' ) { comab.push_back( '\0' ); return comab; } q++; } // 求解以stra[0]开始的公共字串 while ( *p == *q ) { comab.push_back( *p ); cout << *p << endl; p++; q++; if ( *p == '\0' || *q == '\0' ) { comab.push_back( '\0' ); break ; } } vector< char > subCom = getComab( stra, strb+1 ); return comab.size() >= subCom.size() ? comab:subCom; } vector< char > getCom( char * stra, char * strb ) { if ( *stra == '\0' || *strb == '\0' ) { vector< char > coms; coms.push_back( '\0' ); return coms; } char *p = stra; char *q = strb; vector< char > coms; int i = 0; while ( *p != '\0' ) { if (i == 10) break ; cout << *p << endl; cout << "i: " << i << endl; vector< char > coma = getComab( p, q ); printVector(coma); if (coma.size() > coms.size()) { coms = coma; } p++; i++; } return coms; } |
优化: 这个算法是两个循环匹配,存在较多的重复计算,因此方法2的效率要差的多。