Huffman Coding 哈夫曼编码
作者:jostree 转载请注明出处 http://www.cnblogs.com/jostree/p/4096079.html
使用优先队列实现,需要注意以下几点:
1.在使用priority_queue时,内部需要存储哈夫曼树节点的指针,而不能是节点。因为构建哈夫曼树时,需要把其左右指针指向孩子,而如果储存的是节点,那么孩子的地址是会改变的。同理节点应当使用new在内存中开辟,而不能使用vector,原因是vector在数组大小为2整数次幂时,大小会倍增,开辟新数组并把老数组的数字copy过去,从而也会导致地址变化。
2.优先队列对指针的排列,需要额外写一个比较函数来比较指针指向的节点的大小。bool operator () (wcnode * node1, wcnode * node2) return node1->lessthan(node2);并在定义优先队列时使用这种方法: priority_queue <wcnode*, vector<wcnode*>, compare> 第一个参数是节点类型,第二个参数是优先队列的储存结构,第三个参数是比较函数。
3.C++在写入文件时,由于只能按字节写入,因此需要把8个bit位转化为一个字节,最后不足8位用0补齐,并记录文件总bit数,便于解码。然后写入文件。另写入二进制文件可以使用ofstream out("output.txt",std::ofstream::binary);
4.哈夫曼编码信息包括每种字符的映射,和该文件的总bit数。
其代码如下:
1 #include <cstdio> 2 #include <cstdlib> 3 #include <iostream> 4 #include <cstring> 5 #include <fstream> 6 #include <queue> 7 #include <map> 8 #include <vector> 9 using namespace std; 10 class compare; 11 12 class wcnode 13 { 14 public: 15 friend class compare; 16 char word; 17 int count; 18 wcnode* left; 19 wcnode* right; 20 bool lessthan (const wcnode *w)const 21 { 22 return count > w->count; 23 } 24 wcnode(char w='\0', int c=0, wcnode* l=NULL, wcnode * r=NULL) 25 { 26 word = w; count = c; left = l; right = r; 27 } 28 }; 29 30 class compare 31 { 32 public: 33 bool operator () (wcnode * node1, wcnode * node2) 34 { 35 return node1->lessthan(node2); 36 } 37 }; 38 39 void preorder(wcnode *head, vector<bool> rec, map<char, vector<bool> > & res) 40 { 41 if( head->left == NULL && head->right == NULL ) 42 { 43 res[head->word] = rec; 44 return; 45 } 46 vector<bool> l = rec; 47 l.push_back(0); 48 vector<bool> r = rec; 49 r.push_back(1); 50 if(head->left != NULL) preorder(head->left, l, res); 51 if(head->right != NULL) preorder(head->right, r, res); 52 } 53 map<char, vector<bool> > encode(map<char, int> &wordcount) 54 { 55 map<char, vector<bool> > res; 56 priority_queue <wcnode*, vector<wcnode*>, compare> pq; 57 map<char, int>::iterator t; 58 wcnode *tmp; 59 wcnode *t1, *t2, *t3; 60 61 for( t = wordcount.begin() ; t != wordcount.end() ; t++ ) 62 { 63 tmp = new wcnode(); 64 tmp->word = t->first; 65 tmp->count = t->second; 66 pq.push(tmp); 67 } 68 while( pq.size() > 1 ) 69 { 70 t1 = pq.top(); 71 pq.pop(); 72 t2 = pq.top(); 73 pq.pop(); 74 t3 = new wcnode(); 75 t3->count = t1->count + t2->count; 76 t3->left = t1; 77 t3->right = t2; 78 pq.push(t3); 79 } 80 wcnode *huffmanhead = pq.top(); 81 vector<bool> rec; 82 preorder(huffmanhead, rec, res); 83 map<char, vector<bool> >::iterator it; 84 for( it = res.begin() ; it != res.end() ; it++ ) 85 { 86 cout<<it->first<<":"; 87 for( int i = 0; i < it->second.size() ; i++ ) 88 { 89 cout<<it->second[i]; 90 } 91 cout<<", "; 92 } 93 return res; 94 } 95 96 void output(string s, string passage, map<char, vector<bool> > res) 97 { 98 ofstream out(s.c_str()); 99 vector<bool> bit; 100 for( int i = 0 ; i < passage.size() ; i++ ) 101 { 102 vector<bool> tmp = res[passage[i]]; 103 for( int i = 0 ; i < tmp.size(); i++ ) 104 { 105 bit.push_back(tmp[i]); 106 } 107 } 108 char outputchar = 0; 109 for( int i = 0 ; i < bit.size() ; i++ ) 110 { 111 if( i % 8 == 7 ) 112 { 113 out.write(&outputchar, sizeof(outputchar)); 114 outputchar = 0; 115 } 116 outputchar = outputchar + bit[i]; 117 outputchar = outputchar * 2; 118 } 119 if( outputchar != 0 ) 120 { 121 out.write(&outputchar, sizeof(outputchar)); 122 } 123 out.close(); 124 } 125 int main(int argc, char *argv[]) 126 { 127 char tmp; 128 ifstream in("Aesop_Fables.txt"); 129 map <char, int> wordcount; 130 map <char, vector<bool> > res; 131 string passage; 132 while( in.get(tmp) ) 133 { 134 passage += tmp; 135 if( wordcount.count(tmp) == 0 ) 136 { 137 wordcount[tmp] = 1; 138 } 139 else 140 { 141 wordcount[tmp]++; 142 } 143 } 144 res = encode(wordcount); 145 output("outAesop.txt", passage, res); 146 in.close(); 147 }