最长公共字串算法, 文本比较算法, longest common subsequence(LCS) algorithm

 1 '''
 2 merge two configure files, basic file is aFile
 3 insert the added content of bFile compare to aFile
 4 for example, 'bbb' is added content
 5 -----------------------------------------------------------
 6 a file content  |  b file content  |  c merged file content
 7     111         |       111        |    111
 8     aaa         |       bbb        |    aaa
 9                 |                  |    bbb
10     222         |       222        |    222
11 ------------------------------------------------------------
12 '''
13 def mergeFiles(aPath, bPath, cPath):
14 
15     with open(aPath, 'r') as f:
16         aLines = f.readlines();
17         aLines = [ line.strip() + '\n' for line in aLines]
18 
19     with open(bPath, 'r') as f:
20         bLines = f.readlines();
21         bLines = [ line.strip() + '\n' for line in bLines]
22 
23     cLines = mergeSequences(aLines, bLines)
24 
25     with open(cPath, 'w') as f:
26         for line in cLines:
27             f.write(line)
28 
29 '''
30 merge the sequence
31 '''
32 def mergeSequences(aLines, bLines):
33     record = {}
34     lcs = findLCS(record, aLines, 0, bLines, 0)
35     currA = currB = 0
36     merged = []
37     for (line, aI, bI) in lcs:
38 
39         # add deleted
40         if aI > currA:
41             merged.extend(aLines[currA:aI])
42         currA = aI + 1
43 
44         # add added
45         if bI > currB:
46             merged.extend(bLines[currB:bI])
47         currB = bI + 1
48 
49         # add common
50         merged.append(line)
51 
52     if currA < len(aLines):
53         merged.extend(aLines[currA:])
54     if currB < len(bLines):
55         merged.extend(bLines[currB:])
56 
57     return merged
58 
59 '''
60 find Longest common subsequence
61 return list of (line, x, y)
62 line is common line, x is the index in aLines, y is the index in bLines
63 TODO: eliminate recursive invoke, use dynamic algorithm
64 '''
65 def findLCS(record, aLines, aStart, bLines, bStart):
66 
67     key = lcsKey(aStart, bStart)
68     if record.has_key(key):
69         return record[key]
70 
71     aL = aLines[aStart:]
72     bL = bLines[bStart:]
73     if len(aL) > 0 and len(bL) > 0:
74         if aL[0] == bL[0]:
75             lsc = [(aL[0], aStart, bStart)]
76             lsc.extend(findLCS(record, aLines, aStart + 1, bLines, bStart + 1))
77             record[key] = lsc
78             return lsc
79         else:
80             aLsc = findLCS(record, aLines, aStart, bLines, bStart + 1)
81             bLsc = findLCS(record, aLines, aStart + 1, bLines, bStart)
82 
83             if len(aLsc) > len(bLsc):
84                 record[key] = aLsc
85                 return aLsc
86             else:
87                 record[key] = bLsc
88                 return bLsc
89     else:
90         return []
91 
92 Code

 

posted on 2013-11-15 14:30  lpthread  阅读(245)  评论(0编辑  收藏  举报