最长公共子序列和字符串相似度
- 计算两个字符串的最长公共子序列(LCS),且公共子序列在字符串中不需要是连续的。
- 计算两个字符串的距离,完全相同的字符串距离为0,可以通过修改一个字符、增加一个字符或删除一个字符三种方式来使两个字符串相同,但这些方式会使得距离加1。
1、思路:
详见http://zhedahht.blog.163.com/
1 int LCS(const char* str1, const char* str2) 2 { 3 int i, j; 4 int len1 = strlen(str1); 5 int len2 = strlen(str2); 6 int **dp = new int*[len1 + 1]; 7 for (i = 0; i <= len1; i++) 8 dp[i] = new int[len2 + 1]; 9 10 for (i = 0; i <= len1; i++) 11 dp[i][0] = 0; 12 for (j = 0; j <= len2; j++) 13 dp[0][j] = 0; 14 15 for (i = 1; i <= len1; i++) 16 { 17 for (j = 1; j <= len2; j++) 18 { 19 if (str1[i-1] == str2[j-1]) 20 dp[i][j] = dp[i - 1][j - 1] + 1; 21 else 22 dp[i][j] = max(dp[i - 1][j], dp[i][j - 1]); 23 } 24 } 25 26 int result = dp[len1][len2]; 27 for (i = 0; i <= len1; i++) 28 delete[] dp[i]; 29 delete[] dp; 30 return result; 31 }
2、思路:
详见http://www.cnblogs.com/yujunyong/articles/2004724.html
#include <iostream> using namespace std; int minValue(int i, int j, int k) { int result = i < j ? i : j; return result < k ? result : k; } int CalStrDistance(const char* str1, const char* str2) { int i, j; int len1 = strlen(str1); int len2 = strlen(str2); int **dp = new int*[len1 + 1]; for (i = 0; i <= len1; i++) dp[i] = new int[len2 + 1]; for (i = 0; i <= len1; i++) dp[i][0] = i; for (j = 0; j <= len2; j++) dp[0][j] = j; for (i = 1; i <= len1; i++) { for (j = 1; j <= len2; j++) { if (str1[i - 1] == str2[j - 1]) dp[i][j] = dp[i - 1][j - 1]; else dp[i][j] = minValue(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1; } } int result = dp[len1][len2]; for (i = 0; i <= len1; i++) delete[] dp[i]; delete[] dp; return result; } int main() { char* A = "abcd"; char* B = "adbd"; int r = CalStrDistance(A, B); printf("result:\t%d\n", r); }