huffman编解码英文文本[Python]
对英文文本的字母进行huffman编码,heapq优先队列构建huffman树
python huffman.py source.txt result.txt
1 import sys 2 import heapq 3 import collections 4 5 class Node(object): 6 def __init__(self,value = None,count = 1,left = None,right = None, code = ''): 7 self.value = value 8 self.count = count 9 self.left = left 10 self.right = right 11 self.code = code 12 13 def isleaf(self): 14 if self.left != None: 15 return False 16 elif self.right != None: 17 return False 18 else: 19 return True 20 21 def __repr__(self): 22 return "Node(%r,%r)"%(self.value,self.count) 23 #for sort or priority queue 24 def __lt__(self,other): 25 return self.count < other.count 26 #for operator + 27 def __add__(self,other): 28 self.code = 0 29 other.code = 1 30 # only leaf node value is need 31 return Node(self.value+other.value,self.count+other.count,self,other) 32 33 def getTreeRoot(text): 34 Counter = collections.Counter(text) 35 head = [Node(k,v) for (k,v) in Counter.items()] 36 heapq.heapify(head) 37 38 while len(head) >= 2: 39 heapq.heappush(head, heapq.heappop(head) + heapq.heappop(head)) 40 41 root = head[0] 42 return root 43 44 def huffman(root, prefix = []): 45 code = {} 46 if root is None: 47 return code 48 prefix = prefix + [root.code] 49 if root.isleaf(): 50 code[root.value] = prefix 51 else: 52 code.update(huffman(root.left,prefix)) 53 code.update(huffman(root.right,prefix)) 54 return code 55 56 def gethuffmantext(text): 57 root = getTreeRoot(text) 58 codebook = huffman(root) 59 for k,v in codebook.items(): 60 newv = "".join(str(char) for char in v) 61 codebook[k] = newv 62 print (codebook) 63 print("The original text size is {}".format(len(text) * 8)) 64 huffmantext = [] 65 lenhuffman = 0 66 for char in text: 67 lenhuffman += len(codebook[char]) 68 huffmantext.append(codebook[char]) 69 print ("The huffman code text size is {}".format(lenhuffman)) 70 71 return huffmantext, codebook, lenhuffman 72 73 def gettextfromhuffmancode(huffmantext,codebook): 74 75 reversecodebook = {value:key for (key,value) in codebook.items()} 76 77 text = [] 78 for huffmancode in huffmantext: 79 text.append(reversecodebook[huffmancode]) 80 81 return text 82 83 84 if __name__ == "__main__": 85 text = open(sys.argv[1],"rb").read() 86 newfile = sys.argv[2] 87 #huffmantext, codebook, lenhuffman = gethuffmantext("hello world") 88 huffmantext, codebook, lenhuffman = gethuffmantext(text) 89 text1 = gettextfromhuffmancode(huffmantext,codebook) 90 text1 = "".join(str(v) for v in text1) 91 92 lenoftext = len(text)*8.0 93 lenofhuffman = lenhuffman 94 95 print ("The compression ratio is %lf \n" % (1.0 * lenhuffman / lenoftext )) 96 97 fp = open(newfile,"wb") 98 fp.write(text1) 99 fp.close()