最长公共字串算法, 文本比较算法, longest common subsequence(LCS) algorithm
1 ''' 2 merge two configure files, basic file is aFile 3 insert the added content of bFile compare to aFile 4 for example, 'bbb' is added content 5 ----------------------------------------------------------- 6 a file content | b file content | c merged file content 7 111 | 111 | 111 8 aaa | bbb | aaa 9 | | bbb 10 222 | 222 | 222 11 ------------------------------------------------------------ 12 ''' 13 def mergeFiles(aPath, bPath, cPath): 14 15 with open(aPath, 'r') as f: 16 aLines = f.readlines(); 17 aLines = [ line.strip() + '\n' for line in aLines] 18 19 with open(bPath, 'r') as f: 20 bLines = f.readlines(); 21 bLines = [ line.strip() + '\n' for line in bLines] 22 23 cLines = mergeSequences(aLines, bLines) 24 25 with open(cPath, 'w') as f: 26 for line in cLines: 27 f.write(line) 28 29 ''' 30 merge the sequence 31 ''' 32 def mergeSequences(aLines, bLines): 33 record = {} 34 lcs = findLCS(record, aLines, 0, bLines, 0) 35 currA = currB = 0 36 merged = [] 37 for (line, aI, bI) in lcs: 38 39 # add deleted 40 if aI > currA: 41 merged.extend(aLines[currA:aI]) 42 currA = aI + 1 43 44 # add added 45 if bI > currB: 46 merged.extend(bLines[currB:bI]) 47 currB = bI + 1 48 49 # add common 50 merged.append(line) 51 52 if currA < len(aLines): 53 merged.extend(aLines[currA:]) 54 if currB < len(bLines): 55 merged.extend(bLines[currB:]) 56 57 return merged 58 59 ''' 60 find Longest common subsequence 61 return list of (line, x, y) 62 line is common line, x is the index in aLines, y is the index in bLines 63 TODO: eliminate recursive invoke, use dynamic algorithm 64 ''' 65 def findLCS(record, aLines, aStart, bLines, bStart): 66 67 key = lcsKey(aStart, bStart) 68 if record.has_key(key): 69 return record[key] 70 71 aL = aLines[aStart:] 72 bL = bLines[bStart:] 73 if len(aL) > 0 and len(bL) > 0: 74 if aL[0] == bL[0]: 75 lsc = [(aL[0], aStart, bStart)] 76 lsc.extend(findLCS(record, aLines, aStart + 1, bLines, bStart + 1)) 77 record[key] = lsc 78 return lsc 79 else: 80 aLsc = findLCS(record, aLines, aStart, bLines, bStart + 1) 81 bLsc = findLCS(record, aLines, aStart + 1, bLines, bStart) 82 83 if len(aLsc) > len(bLsc): 84 record[key] = aLsc 85 return aLsc 86 else: 87 record[key] = bLsc 88 return bLsc 89 else: 90 return [] 91 92 Code