动态规划法解最长公共子序列问题
问题描述
给定两个字符串,求解这两个字符串的最长公共子序列(Longest Common Sequence)。
输入序列“ ABCDGH”和“ AEDFHR”的LCS为长度3的“ ADH”。
输入序列“ AGGTAB”和“ GXTXAYB”的LCS为长度4的“ GTAB”。
最佳子结构:
假设输入序列分别为长度为m和n的X [0..m-1]和Y [0..n-1]。并令L(X [0..m-1],Y [0..n-1])为两个序列X和Y的LCS的长度。以下是L(X [0 ... m-1],Y [0..n-1])。
如果两个序列的最后一个字符匹配(或X [m-1] == Y [n-1]),则
L(X [0..m-1],Y [0..n-1])= 1 + L(X [0..m-2],Y [0..n-2])
如果两个序列的最后一个字符不匹配(或X [m-1]!= Y [n-1]),则
L(X [0..m-1],Y [0..n-1])= MAX(L(X [0..m-2],Y [0..n-1]),L(X [0..m-1],Y [0..n-2]))
示例:
1)考虑输入字符串“ AGGTAB”和“ GXTXAYB”。最后一个字符与字符串匹配。因此,LCS的长度可以写为:
L(“ AGGTAB”,“ GXTXAYB”)= 1 + L(“ AGGTA”,“ GXTXAY”)
2)考虑输入字符串“ ABCDGH”和“ AEDFHR”。字符串的最后一个字符不匹配。因此,LCS的长度可以写成:
L(“ ABCDGH”,“ AEDFHR”)= MAX(L(“ ABCDG”,“ AEDFH R ”),L(“ ABCDG H ”,“ AEDFH”))
因此,LCS问题具有最佳的子结构属性,因为可以使用子问题的解决方案来解决主要问题。
实现
以下是LCS问题的简单递归实现。该实现仅遵循上述递归结构。
c++
/* A Naive recursive implementation of LCS problem */
#include <bits/stdc++.h>
using namespace std;
int max(int a, int b);
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char *X, char *Y, int m, int n )
{
if (m == 0 || n == 0)
return 0;
if (X[m-1] == Y[n-1])
return 1 + lcs(X, Y, m-1, n-1);
else
return max(lcs(X, Y, m, n-1), lcs(X, Y, m-1, n));
}
/* Utility function to get max of 2 integers */
int max(int a, int b)
{
return (a > b)? a : b;
}
/* Driver code */
int main()
{
char X[] = "AGGTAB";
char Y[] = "GXTXAYB";
int m = strlen(X);
int n = strlen(Y);
cout<<"Length of LCS is "<< lcs( X, Y, m, n ) ;
return 0;
}
java版
/* A Naive recursive implementation of LCS problem in java*/
public class LongestCommonSubsequence
{
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char[] X, char[] Y, int m, int n )
{
if (m == 0 || n == 0)
return 0;
if (X[m-1] == Y[n-1])
return 1 + lcs(X, Y, m-1, n-1);
else
return max(lcs(X, Y, m, n-1), lcs(X, Y, m-1, n));
}
/* Utility function to get max of 2 integers */
int max(int a, int b)
{
return (a > b)? a : b;
}
public static void main(String[] args)
{
LongestCommonSubsequence lcs = new LongestCommonSubsequence();
String s1 = "AGGTAB";
String s2 = "GXTXAYB";
char[] X=s1.toCharArray();
char[] Y=s2.toCharArray();
int m = X.length;
int n = Y.length;
System.out.println("Length of LCS is" + " " +
lcs.lcs( X, Y, m, n ) );
}
}
python版
# A Naive recursive Python implementation of LCS problem
def lcs(X, Y, m, n):
if m == 0 or n == 0:
return 0;
elif X[m-1] == Y[n-1]:
return 1 + lcs(X, Y, m-1, n-1);
else:
return max(lcs(X, Y, m, n-1), lcs(X, Y, m-1, n));
# Driver program to test the above function
X = "AGGTAB"
Y = "GXTXAYB"
print "Length of LCS is ", lcs(X , Y, len(X), len(Y))
输出:
LCS的长度是4
上述递归方法的时间复杂度在最坏的情况下为O(2 ^ n),最坏的情况发生在X和Y的所有字符不匹配(即LCS的长度为0)时。
考虑到上述实现,以下是针对输入字符串“ AXYT”和“ AYZX”
lcs(“ AXYT”,“ AYZX”)
/
lcs(“ AXY”,“ AYZX”)lcs(“ AXYT”,“ AYZ”)
///
lcs(“ AX”,“ AYZX”)lcs(“ AXY”,“ AYZ”)lcs(“ AXY”,“ AYZ”)lcs(“ AXYT”,“ AY”)
在上面的部分递归树中,lcs(“ AXY”,“ AYZ”)被求解两次。如果我们绘制完整的递归树,则可以看到有很多子问题可以一次又一次地解决。因此,此问题具有“重叠子结构”属性,可以通过使用“记忆化”或“制表”来避免重新计算相同子问题。以下是LCS问题的列表实现。
/* Dynamic Programming C++ implementation of LCS problem */
#include<bits/stdc++.h>
using namespace std;
int max(int a, int b);
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char *X, char *Y, int m, int n )
{
int L[m + 1][n + 1];
int i, j;
/* Following steps build L[m+1][n+1] in
bottom up fashion. Note that L[i][j]
contains length of LCS of X[0..i-1]
and Y[0..j-1] */
for (i = 0; i <= m; i++)
{
for (j = 0; j <= n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X[i - 1] == Y[j - 1])
L[i][j] = L[i - 1][j - 1] + 1;
else
L[i][j] = max(L[i - 1][j], L[i][j - 1]);
}
}
/* L[m][n] contains length of LCS
for X[0..n-1] and Y[0..m-1] */
return L[m][n];
}
/* Utility function to get max of 2 integers */
int max(int a, int b)
{
return (a > b)? a : b;
}
// Driver Code
int main()
{
char X[] = "AGGTAB";
char Y[] = "GXTXAYB";
int m = strlen(X);
int n = strlen(Y);
cout << "Length of LCS is "
<< lcs( X, Y, m, n );
return 0;
}
/* Dynamic Programming Java implementation of LCS problem */
public class LongestCommonSubsequence
{
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char[] X, char[] Y, int m, int n )
{
int L[][] = new int[m+1][n+1];
/* Following steps build L[m+1][n+1] in bottom up fashion. Note
that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] */
for (int i=0; i<=m; i++)
{
for (int j=0; j<=n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X[i-1] == Y[j-1])
L[i][j] = L[i-1][j-1] + 1;
else
L[i][j] = max(L[i-1][j], L[i][j-1]);
}
}
return L[m][n];
}
/* Utility function to get max of 2 integers */
int max(int a, int b)
{
return (a > b)? a : b;
}
public static void main(String[] args)
{
LongestCommonSubsequence lcs = new LongestCommonSubsequence();
String s1 = "AGGTAB";
String s2 = "GXTXAYB";
char[] X=s1.toCharArray();
char[] Y=s2.toCharArray();
int m = X.length;
int n = Y.length;
System.out.println("Length of LCS is" + " " +
lcs.lcs( X, Y, m, n ) );
}
}
# Dynamic Programming implementation of LCS problem
def lcs(X , Y):
# find the length of the strings
m = len(X)
n = len(Y)
# declaring the array for storing the dp values
L = [[None]*(n+1) for i in xrange(m+1)]
"""Following steps build L[m+1][n+1] in bottom up fashion
Note: L[i][j] contains length of LCS of X[0..i-1]
and Y[0..j-1]"""
for i in range(m+1):
for j in range(n+1):
if i == 0 or j == 0 :
L[i][j] = 0
elif X[i-1] == Y[j-1]:
L[i][j] = L[i-1][j-1]+1
else:
L[i][j] = max(L[i-1][j] , L[i][j-1])
# L[m][n] contains the length of LCS of X[0..n-1] & Y[0..m-1]
return L[m][n]
#end of function lcs
# Driver program to test the above function
X = "AGGTAB"
Y = "GXTXAYB"
print "Length of LCS is ", lcs(X, Y)
打印最长的公共子序列
以下是打印LCS的详细算法。它使用相同的2D表L [] []。
1)构造L [m + 1] [n + 1] 。
2)值L [m] [n]包含LCS的长度。创建一个字符数组lcs [],其长度等于lcs的长度加1(用于存储\ 0的字符数组)。
3)从L [m] [n]开始遍历2D数组。对每个单元格L [i] [j]
..... a执行以下操作:a)如果对应于L [i] [j]的字符(在X和Y中)相同(或X [i-1] == Y [j- 1]),然后将此字符作为LCS的一部分。
….. b)否则比较L [i-1] [j]和L [i] [j-1]的值并朝更大的方向前进。
/* Dynamic Programming implementation of LCS problem */
#include<iostream>
#include<cstring>
#include<cstdlib>
using namespace std;
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
void lcs( char *X, char *Y, int m, int n )
{
int L[m+1][n+1];
/* Following steps build L[m+1][n+1] in bottom up fashion. Note
that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] */
for (int i=0; i<=m; i++)
{
for (int j=0; j<=n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X[i-1] == Y[j-1])
L[i][j] = L[i-1][j-1] + 1;
else
L[i][j] = max(L[i-1][j], L[i][j-1]);
}
}
// Following code is used to print LCS
int index = L[m][n];
// Create a character array to store the lcs string
char lcs[index+1];
lcs[index] = '\0'; // Set the terminating character
// Start from the right-most-bottom-most corner and
// one by one store characters in lcs[]
int i = m, j = n;
while (i > 0 && j > 0)
{
// If current character in X[] and Y are same, then
// current character is part of LCS
if (X[i-1] == Y[j-1])
{
lcs[index-1] = X[i-1]; // Put current character in result
i--; j--; index--; // reduce values of i, j and index
}
// If not same, then find the larger of two and
// go in the direction of larger value
else if (L[i-1][j] > L[i][j-1])
i--;
else
j--;
}
// Print the lcs
cout << "LCS of " << X << " and " << Y << " is " << lcs;
}
/* Driver program to test above function */
int main()
{
char X[] = "AGGTAB";
char Y[] = "GXTXAYB";
int m = strlen(X);
int n = strlen(Y);
lcs(X, Y, m, n);
return 0;
}
/* Dynamic Programming Java implementation of LCS problem */
public class LongestCommonSubsequence
{
/* Returns length of LCS for X[0..m-1], Y[0..n-1] */
int lcs( char[] X, char[] Y, int m, int n )
{
int L[][] = new int[m+1][n+1];
/* Following steps build L[m+1][n+1] in bottom up fashion. Note
that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1] */
for (int i=0; i<=m; i++)
{
for (int j=0; j<=n; j++)
{
if (i == 0 || j == 0)
L[i][j] = 0;
else if (X[i-1] == Y[j-1])
L[i][j] = L[i-1][j-1] + 1;
else
L[i][j] = max(L[i-1][j], L[i][j-1]);
}
}
return L[m][n];
}
/* Utility function to get max of 2 integers */
int max(int a, int b)
{
return (a > b)? a : b;
}
public static void main(String[] args)
{
LongestCommonSubsequence lcs = new LongestCommonSubsequence();
String s1 = "AGGTAB";
String s2 = "GXTXAYB";
char[] X=s1.toCharArray();
char[] Y=s2.toCharArray();
int m = X.length;
int n = Y.length;
System.out.println("Length of LCS is" + " " +
lcs.lcs( X, Y, m, n ) );
}
}
# Dynamic programming implementation of LCS problem
# Returns length of LCS for X[0..m-1], Y[0..n-1]
def lcs(X, Y, m, n):
L = [[0 for x in xrange(n+1)] for x in xrange(m+1)]
# Following steps build L[m+1][n+1] in bottom up fashion. Note
# that L[i][j] contains length of LCS of X[0..i-1] and Y[0..j-1]
for i in xrange(m+1):
for j in xrange(n+1):
if i == 0 or j == 0:
L[i][j] = 0
elif X[i-1] == Y[j-1]:
L[i][j] = L[i-1][j-1] + 1
else:
L[i][j] = max(L[i-1][j], L[i][j-1])
# Following code is used to print LCS
index = L[m][n]
# Create a character array to store the lcs string
lcs = [""] * (index+1)
lcs[index] = ""
# Start from the right-most-bottom-most corner and
# one by one store characters in lcs[]
i = m
j = n
while i > 0 and j > 0:
# If current character in X[] and Y are same, then
# current character is part of LCS
if X[i-1] == Y[j-1]:
lcs[index-1] = X[i-1]
i-=1
j-=1
index-=1
# If not same, then find the larger of two and
# go in the direction of larger value
elif L[i-1][j] > L[i][j-1]:
i-=1
else:
j-=1
print "LCS of " + X + " and " + Y + " is " + "".join(lcs)
# Driver program
X = "AGGTAB"
Y = "GXTXAYB"
m = len(X)
n = len(Y)
lcs(X, Y, m, n)