目标:

找出两个 字符串 X = 'CGATAATTGAGA' 和 Y = 'GTTCCTAATA' 的最长公共子序列。用动态规划求解(解不唯一?)。

首先建立一个表 table = table[row,col] = table[12+1,10+1];为什么多一格呢? 

先初始化每个单元格的值为0;

然后,遍历行和列:

当X[row] = Y[col]时,table[row,col] = table[row-1,col-1]+1;

当X[row] <> Y[col]时,table[row,col] = Math.max(table[row,col-1],table[row-1,col]);

这样就构建出一个下面的表:

        G  T  T  C  C  T  A  A  T  A       

   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]    

C [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]    

G [0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]    

A [0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2]    

T [0, 1, 2, 2, 2, 2, 2, 2, 2, 3, 3]    

A [0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 4]    

A [0, 1, 2, 2, 2, 2, 2, 3, 4, 4, 4]    

T [0, 1, 2, 3, 3, 3, 3, 3, 4, 5, 5]    

T [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]    

G [0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5]    

A [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]    

G [0, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6]    

A [0, 1, 2, 3, 3, 3, 4, 5, 6, 6, 6]

通过这个表来找出LCS,LCS由表的下标给出,将这些下标放入一个数组L

从表的最后开始回溯:

row = 13, col = 11;

1、table[13,11]对应的行和列的字符相等,将(13,11)放入L,row-=1,col-=1;

2、table[12,10]对应的行和列的字符不相等,比较table[11,10]和table[12,9]

  2.1 table[11,10] > table[12,9],i -= 1;

  2.2 否则 j -= 1;

重复1、2

实现:

/*
  initial a row*col table
  return a table
*/
function initTable(row,col){
    var table = [];
    
    for(var i=0;i<row+1;i++){
	if(!table[i])table[i] = [];
	for(var j=0;j<col+1;j++){
	    if(!table[i][j])table[i][j] = [];
	    table[i][j] = 0;
	}
    }
    return table;
}
/*
  compute the path index of the table
  return array contain the index of s1 and s2
*/
function computeLCSIndex(s1,s2,table){
    var i = s1.length,
    j = s2.length,
    L1=[];
    
    while(i>0 && j>0){
	//console.log(i,j);
	if(s1[i-1] == s2[j-1]){
	    //console.log(i,j);
	    L1.push([i,j]);
	    j -=1;
	    i -=1;
	}else{
	    if(!table[i-1]){
		console.log(i);	    
		break;
	    }

	    if(table[i-1][j] > table[i][j-1]){
		i -= 1;
	    }else{
		j -= 1;
	    }
	}
    }

    return L1;
}
/*
  get LCS from s1,or s2 from LCS index
 */
function LCS(s1,s2,L1){
    //console.log(L1 = L1.reverse());
    L1 = L1.reverse();
    var LCS = "";
    for(var j=0;j<L1.length;j++){
	LCS+=(s2[L1[j][1]-1]);
    }
    //console.log(LCS);
    return LCS;
}

function diff(s1,s2){
    var len1 = s1.length,
    len2 = s2.length,
    table,
    L1;//LCS index of s1
    
    table  = initTable(len1,len2);

    for(k=1;k<len1+1;k++){
	for(l=1;l<len2+1;l++){
	    if(s1[k-1] === s2[l-1]){
		//console.log("k:"+k+",l:"+l+table[k-1][l-1]);
		table[k][l] = table[k-1][l-1] + 1;
	    }else{
		//console.log("k:"+k,"l:"+l);
		table[k][l] = Math.max(table[k][l-1],table[k-1][l]);
	    }
	}
    }
    /*
    table.forEach(function(i){
	console.log(i);
    });
    */

    L1 = computeLCSIndex(s1,s2,table);
    return L1;
}

var debug = true;
if(debug){
    var A = 'CGATAATTGAGA',
    B = 'GTTCCTAATA';
    var L = diff(A,B);
    
    var s = LCS(A,B,L);
    console.log(s);

    var X = ["Cqdf", "G23", "A", "T", "A", "A", "T", "T", "G", "A", "G", "A"],
    Y = ["G2", "Tsfj", "T", "C", "C", "T", "A", "A", "T", "A"];

    var L2 = diff(X,Y);
    var s2 = LCS(X,Y,L2);

    console.log(s2);
}

通常是以单个字符作为比较的“元”,若以多个字符作为比较的元,那么输入的参数为数组即可:

posted on 2011-06-03 15:11  wewe.Tom  阅读(594)  评论(0编辑  收藏  举报