完美huffman编码压缩解压缩实现(实现256个ASCII字符)

 
 1 //<HuffmanGKM.h>
 2  #include "stdafx.h"
 3  #include <string>
 4 
 5  using std::string;
 6 
 7  const int CHNUM=256;    //字符数
 8  const int PLUS=128;        //字符下标偏移量
 9  struct WeightGkm        //字符频度结构,包含频度和字符值
10  {
11      unsigned long w;
12      char c;
13  };
14  typedef struct HTNode    //huffman树结构
15  {
16      int count;
17      WeightGkm w;
18      string code;
19      HTNode * lchild;
20      HTNode * rchild;
21  }HTNode,*HTree;
22 
23  class HuffmanGKM
24  {
25  private:
26      HTree T;    //构造Huffman树;
27      string huffCode[CHNUM];        //256个字符的Huffman编码;
28 
29      unsigned long weight[CHNUM];    //256个可打印字符的频度(或叫权重)
30      unsigned long long file_size;    //原始文件字符总数,即文件长度
31 
32      string original_file;    //原始文件路径
33      string compress_file;    //压缩文件存储路径
34      string decompress_file;    //解压缩文件存储路径
35 
36      void QuickSortHT( HTree ht[], int left, int right);        //快速排序
37      int Partition( HTree ht[], int left, int right);        //快速排序中的“分半”
38      void SelectInsert( HTree ht[], HTree t, int left ,int right);    //按序插入
39  public:
40 
41      HuffmanGKM( string originalFile , string compressFile, string decompressFile);
42 
43      int ReadFile();        //读取原文件,并记录每个字符频度
44      int BuildHuffTree();    //根据频度建立字符的Huffman树
45      int CreateHuffCode();    //根据huffman树得到huffman编码
46      int CompressFile();        //用新编码转换原文件
47      int DecompressFile();    //根据huffman树解压缩huffman编码的压缩文件
48 
49      ~HuffmanGKM();        //析构函数,释放堆空间
50  };
  1 //<HuffmanGKM.cpp>
  2  #include "stdafx.h"
  3  #include <iostream>
  4  #include <fstream>
  5  #include "HuffmanGKM.h"
  6  #include <string>
  7  #include <bitset>
  8 
  9  using std::bitset;
 10  using std::string;
 11  using std::ifstream;
 12  using std::ofstream;
 13  using std::cout;
 14  using std::endl;
 15  using std::ios;
 16 
 17  HuffmanGKM::HuffmanGKM( string originalFile , string compressFile, string decompressFile)
 18  {
 19      for(int i=0;i<CHNUM;i++)
 20          weight[i]=0;
 21 
 22      file_size=0;
 23      original_file=originalFile;
 24      compress_file=compressFile;
 25      decompress_file=decompressFile;
 26 
 27  }
 28 
 29  int HuffmanGKM::ReadFile()
 30  {
 31      ifstream read;
 32      read.open (original_file);
 33      if(read.fail())
 34      {
 35          cout<<"The original file open failed when read file!!";
 36          return 0;
 37      }
 38 
 39      char next;
 40      read.get(next);
 41      while(!read.eof())//统计频度。
 42      {
 43          weight[next+PLUS]++;
 44          read.get(next);
 45          file_size++;
 46      }
 47 
 48      read.close ();
 49      return 0;
 50  }
 51 
 52  void HuffmanGKM::QuickSortHT ( HTree htt[], int left, int right )
 53  {
 54      int pivot;
 55      if( left < right )  // 肯定为真的条件
 56      {
 57          pivot = Partition ( htt, left, right );
 58          QuickSortHT( htt, left, pivot-1 );
 59          QuickSortHT( htt, pivot+1, right );
 60      }
 61 
 62  }
 63  //快速排序的patition算法
 64  int HuffmanGKM::Partition ( HTree htt[ ], int left, int right )  //这是左大右小的排序
 65  {
 66      HTree HTPivot = htt[left];      //这叫“虚左以待”
 67 
 68      while( left < right )
 69      {
 70          while( right > left && htt[ right]->w.w >= HTPivot->w.w )
 71              right--;
 72          htt[ left ] = htt[ right ];
 73 
 74          while( left < right && htt[ left ]->w.w <= HTPivot->w.w )
 75              left++;
 76          htt[ right ] = htt[ left ];
 77      }
 78 
 79      htt[ left ] = HTPivot;
 80 
 81      return left;  //最后left=right,所以返回哪个都一样
 82 
 83  }
 84 
 85  void HuffmanGKM:: SelectInsert( HTree htt[], HTree p, int left ,int right)//left是第一个要比较的元素
 86  {
 87      for( ;left<=right;left++)
 88      {
 89          if( p->w.w > htt[left]->w.w )
 90              htt[left-1]=htt[left];//左移小元素。
 91          else
 92              break;
 93      }
 94      htt[left-1]=p;
 95  }
 96 
 97  int HuffmanGKM::BuildHuffTree()
 98  {
 99      int left=0,right=CHNUM-1;
100      HTree ht[CHNUM];    //树结点的排序数组
101 
102      for( int i=0; i<CHNUM;i++)    //初始化huffman树结点
103      {
104          ht[i]= new HTNode ;
105          ht[i]->w.w=weight[i];    //字符频度
106          ht[i]->count=1;            //树中结点个数,仅做测试用。
107          ht[i]->w.c=i-PLUS;        //字符值
108          ht[i]->lchild =0;
109          ht[i]->rchild=0;
110      }
111 
112      QuickSortHT( ht ,left , right );    //先把各结点字符按频度升序排序。
113 
114      HTree parent;
115      while(left<right)    //建树的过程很简单。
116      {
117          ht[left]->code ="1";
118          ht[left+1]->code ="0";
119          parent=new HTNode;
120          parent->lchild =ht[left];
121          parent->rchild =ht[left+1];
122 
123          parent->w.c=0;
124          parent->w.w=parent->lchild ->w.w+parent->rchild ->w.w ;
125          parent->count=parent->lchild ->count + parent->rchild->count + 1;
126          SelectInsert( ht,parent,left+2,right);
127          left++;
128      }
129      T=parent;    //T为建好的huffman树。
130      return 0;
131  }
132 
133  int HuffmanGKM::CreateHuffCode ()
134  {
135      //非递归后序遍历二叉树,访问叶子结点
136      HTree stack[CHNUM];
137      int sign[CHNUM]={0};
138      HTree p=T;
139      int top=0;
140 
141      while( p||top )
142      {
143          if(p)
144          {
145              stack[top]=p;
146              sign[top]=1;
147              top++;
148              p=p->lchild ;
149          }
150          else // p为空指针,循环出栈
151              while( top!=0 )   //后序遍历中,当访问完一个结点时,则以该结点为根的树都访问完,所以下一步应该继续出栈,
152              {
153                  top--;
154                  p = stack[top];
155 
156                  if( sign[top] == 2 ) //表示p的左右子树都已走过 
157                  {
158                      if( p->lchild ==0 && p->rchild ==0 )
159                          for(int i=1;i<=top;i++)
160                              huffCode[p->w.c+PLUS]+=stack[i]->code;
161                  }
162                  else //表示仅走过T的左子树 ,右子树必定是第一次遇到,
163                  {
164                      stack[top]=p;
165                      sign[top]=2;
166                      top++;
167                      p=p->rchild;
168                      break;
169                  }//else if
170              } //while ( !IsEmpty )  
171 
172              if(top==0)
173                  break;
174      }//while
175      return 0;
176  }
177 
178  int HuffmanGKM::CompressFile()
179  {
180      ifstream read;
181      read.open (original_file);
182      if(read.fail ())
183      {
184          cout<<"The original file open failed when compress!!!";
185          return 1;
186      }
187      ofstream write;
188      write.open(compress_file,ios::binary );
189      if(write.fail ())
190      {
191          cout<<"The compress files open failed when compress!!!" ;
192          return 1;
193      }
194      char next;
195      unsigned char buff=0;
196      int count=0;
197 
198      read.get(next);
199      while(!read.eof())
200      {
201          for(string::size_type i=0;i<huffCode[next+PLUS].size();i++)
202          {
203              if( huffCode[next+PLUS][i]=='0')
204                  buff=(buff<<1);
205              else
206                  if(huffCode[next+PLUS][i]=='1')
207                      buff=(buff<<1)|1;
208              count++;
209              if(count==8)
210              {
211                  write<<buff;
212                  count=0;
213              }
214          }
215          read.get(next);
216      }
217      if(count!=0)
218          for(;count!=8;count++)
219              buff=(buff<<1);
220      write<<buff;
221      read.close();
222      write.close();
223 
224      return 0;
225  }
226 
227  int HuffmanGKM::DecompressFile ()
228  {
229      ifstream read;
230      read.open (compress_file,ios::binary );
231      if(read.fail())
232      {
233          cout<<"The compress file pen failed when decompress!!"<<endl;
234          return 0;
235      }
236      ofstream write;
237      write.open (decompress_file);
238      if(write.fail())
239      {
240          cout<<"The decompress file open failed when decompress!!"<<endl;
241          return 0;
242      }
243      HTree p=T;
244 
245      char next;
246      read.get(next);
247      unsigned long long countSize=0;
248      while(1)
249      {
250          bitset<8>b(next);
251          read.get(next);
252          for(int i=b.size()-1;i>=0;i--)
253          {
254              if(b.test(i))
255                  p=p->lchild ;
256              else
257                  p=p->rchild ;
258 
259              if(p->lchild ==0 && p->rchild ==0)
260              {
261                  write<<p->w.c;
262                  p=T;
263                  countSize++;
264              }
265              if(countSize>=file_size)
266                  break;
267          }
268          if(countSize>=file_size)
269                  break;
270      }
271 
272      read.close ();
273      write.close();
274      return 0;
275  }
276 
277  HuffmanGKM::~HuffmanGKM()
278  {
279 
280      HTree stack[CHNUM];
281      int sign[CHNUM]={0};
282 
283      HTree p=T;
284      int top=0;
285      while( p||top )
286      {
287          if(p)
288          {
289              stack[top]=p;
290              sign[top]=1;
291              top++;
292              p=p->lchild ;
293          }
294          else // p为空指针,循环出栈
295              while( top!=0 )   //后序遍历中,当访问完一个结点时,则以该结点为根的树都访问完,所以下一步应该继续出栈,
296              {
297                  top--;
298                  p = stack[top];
299 
300                  if( sign[top] == 2 ) //表示p的左右子树都已走过 ,后序遍历,释放所有结点
301                      delete(p);
302 
303                  else //表示仅走过T的左子树 ,右子树必定是第一次遇到,
304                  {
305                      stack[top]=p;
306                      sign[top]=2;
307                      top++;
308                      p=p->rchild;
309                      break;
310                  }//else if
311              } //while ( !IsEmpty )  
312 
313              if(top==0)
314                  break;
315      }//while
316
 1  //Huffman.cpp : main函数文件。
 2  #include "stdafx.h"
 3  #include "HuffmanGKM.h"
 4  #include <iostream>
 5  #include <string>
 6 
 7  using std::string;
 8  using std::cout;
 9  using std::endl;
10  int _tmain(int argc, _TCHAR* argv[])
11  {
12 
13      string originalFile="../TestGkm/奥巴马.txt";
14      string compressFile="../TestGkm/奥巴马compress.txt";
15      string decompressFile="../TestGkm/奥巴马decompress.txt";
16 
17      HuffmanGKM huff( originalFile, compressFile,decompressFile);
18 
19      huff.ReadFile ();
20      huff.BuildHuffTree ();
21      huff.CreateHuffCode ();
22      huff.CompressFile();
23      huff.DecompressFile ();
24 
25      cout<<"COMPLETE!"<<endl;
26 
27      getchar();
28 
29      return 0;
30  }

posted on 2012-04-15 09:58  NLP新手  阅读(3523)  评论(0编辑  收藏  举报

导航