Huffuman Coding (哈夫曼编码)

哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式,哈夫曼编码是可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。

 

  1 #include "stdafx.h"
  2 #include <iostream>
  3 #include<queue>
  4 #include<vector>
  5 #include <string>
  6 
  7 using namespace std;
  8 
  9 typedef struct 
 10 {
 11     //内容
 12     unsigned char    character;
 13     //频次
 14     unsigned int    frequency;
 15 
 16 } CharNode;
 17 
 18 struct MinHeapNode
 19 {
 20     char data;
 21     unsigned int freq;
 22     MinHeapNode *left, *right;
 23     MinHeapNode(char data, unsigned int freq)
 24     {
 25         left = right = NULL;
 26         this->data = data;
 27         this->freq = freq;
 28     }
 29 };
 30 typedef struct MinHeapNode MinHeapNode;
 31 
 32 struct compare
 33 {
 34     bool operator()(MinHeapNode *a, MinHeapNode *b)
 35     {
 36         //"a > b" 表示 数值小 的 优先级高, 排最上面
 37         //反之 "<" 表示大的优先级高
 38         return (a->freq > b->freq);
 39     }
 40 };
 41 
 42 void get_huffuman_code(MinHeapNode *topNode, string code)
 43 {
 44     if (NULL == topNode)
 45     {
 46         return;
 47     }
 48     //表示已到达最末端,含有左右节点的元素中(由while循环中生成)data必为-1
 49     if (topNode->data != -1)
 50     {
 51         cout << topNode->data << " : " << code<<endl;
 52     }
 53     get_huffuman_code(topNode->left, code + "0");
 54     get_huffuman_code(topNode->right, code + "1");
 55 }
 56 
 57 int main()
 58 {
 59          FILE * inputFile = NULL;
 60          fopen_s(&inputFile,"input.txt", "rb");
 61          if (!inputFile)
 62          {
 63              cout<< "Error: open file failed !" << endl;
 64              return -1;
 65          }
 66 
 67          //初始化ASCII码数组
 68          CharNode nodeArr[256] = { {0,0} };
 69          while (!feof(inputFile))
 70          {
 71               char buf = getc(inputFile);
 72               cout << buf;
 73               nodeArr[buf].character = buf;
 74               nodeArr[buf].frequency++;
 75          }
 76          cout << endl;
 77          //定义一个小顶堆
 78          priority_queue<MinHeapNode*, vector<MinHeapNode*>, compare > minHeap;
 79          for (size_t i = 0; i < 256; i++)
 80          {
 81              //将数据堆中,与升序排序效果类似,即堆首为小值,堆尾为大值
 82              if (nodeArr[i].frequency > 0)
 83              {
 84                  minHeap.push(new MinHeapNode(nodeArr[i].character, nodeArr[i].frequency));
 85                  cout << "Node " << i << ": [" << nodeArr[i].character << ", " << nodeArr[i].frequency << "]" << endl;
 86              }
 87          }
 88 
 89          MinHeapNode *leftNode = NULL, *rightNode = NULL, *topNode = NULL;
 90          //从堆首中抛出两个较小节点,生成一个新节点放回堆中并进行重新排序,
 91          //故,每次操作完成后 堆中会减少一个元素, 且堆顶必为权值最小的2个元素 
 92          //直至堆中只剩一个元素时,哈夫曼树生成完毕
 93          while (minHeap.size() > 1)
 94          {
 95              //较小放左侧
 96              leftNode = minHeap.top();
 97              minHeap.pop();
 98              //较大的放右侧
 99              rightNode = minHeap.top();
100              minHeap.pop();
101 
102              topNode = new MinHeapNode(-1, leftNode->freq + rightNode->freq);
103              topNode->left = leftNode;
104              topNode->right = rightNode;
105              minHeap.push(topNode);
106          }
107          get_huffuman_code(topNode, "");
108          getchar();
109          return 0;
110 }
View Code

 对于解码过程,从根节点出发,依次读取文件流的bit 遇到“0”找leftNode,遇到“1”找rightNode,直至 data != -1

posted @ 2018-09-16 20:00  gaobowen  阅读(338)  评论(0编辑  收藏  举报