这是动态规划的经典问题之一,也有空间换时间的特点:储存计算过程中的信息,防止回溯及多余的遍历。
【公共连续子串】
我们知道,如果寻找“最长公共连续子串”问题,那么可以用下面文章所说的“矩阵填充法”:
http://acm.whu.edu.cn/blog/read.php?24
实际上不连续的子序列问题,也还是采用了类似的方法。
【最长公共子序列问题】
具体的解释和递推公式,可以看下面的参考文献。我的代码里面主要是两个函数,第一个是计算长度,第二个是在矩阵中回溯,以找到这个子序列。回溯采取了与生成完全一致的方法。
//LCS
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
int LCS_length( string& a, string& b, vector< vector<int> >& matrix )
{
int i, j;
for( i = 1; i <= a.size(); i++ )
{
for( j = 1; j <= b.size(); j++ )
{
if( a[i-1] == b[j-1] )
{
//find a common char
matrix[i][j] = matrix[i-1][j-1] + 1;
}
else
{
matrix[i][j] = max( matrix[i-1][j], matrix[i][j-1] );
}
}
}
//return the most bottom-right element in the matrix
return matrix[a.size()][b.size()];
}
string build_LCS( string& a, string& b )
{
//record the search result.
//Important! the matrix should be larger than the string in size!
vector< vector<int> > matrix( a.size()+1, vector<int>( b.size()+1, 0 ) );
//calculate the length of LCS
int len = LCS_length( a, b, matrix );
cout<<"The length of longest common subsequence is :"<<len<<endl;
//the size of matrix
int i = a.size();
int j = b.size();
string str;//save the LCS
//backtracking the matrix
int k = 0;
while( k <= len - 1 )//important! len-1 not len.
{
//a[i] is not the common char
if( matrix[i][j] == matrix[i-1][j] )
i--;
//b[j] is not the common char
else if( matrix[i][j] == matrix[i][j-1] )
j--;
//if matrix[i][j] == matrix[i-1][j-1]
//find the common char
else
{
str.push_back( a[i-1] );
k++;
i--;j--;
}
}
//reverse the LCS
reverse(str.begin(),str.end());
return str;
}
int main()
{
string a = "hello world happy birthday";
string b = "abcd happy birthday";
cout<<"The LCS is: "<<endl<<build_LCS( a, b )<<endl;
}
#include <iostream>
#include <vector>
#include <string>
#include <algorithm>
using namespace std;
int LCS_length( string& a, string& b, vector< vector<int> >& matrix )
{
int i, j;
for( i = 1; i <= a.size(); i++ )
{
for( j = 1; j <= b.size(); j++ )
{
if( a[i-1] == b[j-1] )
{
//find a common char
matrix[i][j] = matrix[i-1][j-1] + 1;
}
else
{
matrix[i][j] = max( matrix[i-1][j], matrix[i][j-1] );
}
}
}
//return the most bottom-right element in the matrix
return matrix[a.size()][b.size()];
}
string build_LCS( string& a, string& b )
{
//record the search result.
//Important! the matrix should be larger than the string in size!
vector< vector<int> > matrix( a.size()+1, vector<int>( b.size()+1, 0 ) );
//calculate the length of LCS
int len = LCS_length( a, b, matrix );
cout<<"The length of longest common subsequence is :"<<len<<endl;
//the size of matrix
int i = a.size();
int j = b.size();
string str;//save the LCS
//backtracking the matrix
int k = 0;
while( k <= len - 1 )//important! len-1 not len.
{
//a[i] is not the common char
if( matrix[i][j] == matrix[i-1][j] )
i--;
//b[j] is not the common char
else if( matrix[i][j] == matrix[i][j-1] )
j--;
//if matrix[i][j] == matrix[i-1][j-1]
//find the common char
else
{
str.push_back( a[i-1] );
k++;
i--;j--;
}
}
//reverse the LCS
reverse(str.begin(),str.end());
return str;
}
int main()
{
string a = "hello world happy birthday";
string b = "abcd happy birthday";
cout<<"The LCS is: "<<endl<<build_LCS( a, b )<<endl;
}
【参考文献】
最长公共子序列(Longest Common Subsequence)
http://blog.csdn.net/hhygcy/archive/2009/03/02/3948969.aspx
这里还给出了《编程之美》中“计算字符串的相似度”问题(也是一个动态规划算法)的非递归解法。字符串的相似度问题又称“编辑距离”(英文又叫Levenshtein Distance或者Edit distance),是用于字符串处理的常用算法之一,像windows下的winmerge和linux下的diff工具,就是基于这种字符串比较的。
代码如下:
// Edit_Distance.cpp : Defines the entry point for the console application.
#include <vector>
#include <iostream>
#include <algorithm>
template <class T> unsigned int edit_distance(const T& s1, const T& s2)
{
const size_t len1 = s1.size(), len2 = s2.size();
std::vector<std::vector<unsigned int> > d(len1 + 1, std::vector<unsigned int>(len2 + 1));
for(int i = 1; i <= len1; ++i) d[i][0] = i;
for(int i = 1; i <= len2; ++i) d[0][i] = i;
for(int i = 1; i <= len1; ++i)
for(int j = 1; j <= len2; ++j)
d[i][j] = std::min<> ( std::min<> (d[i - 1][j] + 1,d[i][j - 1] + 1), d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) );
return d[len1][len2];
}
int main()
{
std::string s1("ABCBDAB");
std::string s2("BDCABA");
std::cout << "edit distance = " << edit_distance(s1, s2) << std::endl;;
return 0;
}
#include <vector>
#include <iostream>
#include <algorithm>
template <class T> unsigned int edit_distance(const T& s1, const T& s2)
{
const size_t len1 = s1.size(), len2 = s2.size();
std::vector<std::vector<unsigned int> > d(len1 + 1, std::vector<unsigned int>(len2 + 1));
for(int i = 1; i <= len1; ++i) d[i][0] = i;
for(int i = 1; i <= len2; ++i) d[0][i] = i;
for(int i = 1; i <= len1; ++i)
for(int j = 1; j <= len2; ++j)
d[i][j] = std::min<> ( std::min<> (d[i - 1][j] + 1,d[i][j - 1] + 1), d[i - 1][j - 1] + (s1[i - 1] == s2[j - 1] ? 0 : 1) );
return d[len1][len2];
}
int main()
{
std::string s1("ABCBDAB");
std::string s2("BDCABA");
std::cout << "edit distance = " << edit_distance(s1, s2) << std::endl;;
return 0;
}