Huffman压缩和解压

一、 需求分析

1. 本演示程序中，模拟的是小写26个英文字母的Huffman压缩，字母小随机函数随机产生，后统计字母个数建立Huffman树，用建立Huffman树将字母转为二进制流，再将二进制流每次分8个转为一个Unsigned Char写入物理内存！

2. 解压时建立同样的Huffman树或读取已保存的Huffman树，通过将读取的Unsigned Char转为比特流，再由比特流得到原被压缩的字母

3. 程序执行的命令有：随机文件的产生，文件统计，Huffman树的建立，压缩，解压，程序的运行过程和其它信息的文件存储！

4. 程序测试：运行程序，产生随机文件input.txt，压缩时得到的比特流bytes.txt，压缩文件compress.txt，解压时到得的比特流uncompress.txt，解压后文件output.txt和程序的运行过程文件Run.txt，其中红色文件为可无文件

二、 概要设计

1. 为达到压缩效果，采用Huffman树来进行编码压缩

2. Huffman编码后若直接保存比特流至txt将达不到压缩的效果，因此建议将比特流转为Unsigned char 来间接存储以达到明显的压缩效果

3. 解压时因用到之前的Huffman树，因此在压缩时有必要保存树的状态

4. Huffman树在压缩时直接使用对应编码即可，在解压时通过遍历树来得到被编码的字母

三、 详细设计

1. 树结构体的定义和全局变量的定义：

typedef struct HTNode{/*Huffman Tree 的结构定义*/

unsigned int weight;

unsigned int parent, lchild, rchild;

}HTNode,* HuffmanTree;

typedef char** HuffmanCode;/*指向字符指针的指针的数据的定义*/

int stat[27];/*存储总数,存储26个字母出现的次数*/

HuffmanTree HT;

HuffmanCode HC;

2. 随机文件的产生函数：

void CreatFile(){/*自动产生文件*/

FILE *Fopen,*Fout;

int i;

srand( (unsigned)time(NULL));

Fout=fopen("./Run.txt","w");

fprintf(Fout,"NO.1: Creating a file by rand()"n");

Fopen = fopen("./input.txt", "w");

for(i=0; i<10000; i++){

char ch=(char)(97+rand()%26);

fputc(ch, Fopen);

}

fclose(Fopen);

fclose(Fout);

}

3. 字母个数的统计：

void ReadStat(char *Filename){/*每个字符出现的次数统计*/

int c;

FILE *Fopen=fopen(Filename, "r");

while((c=fgetc(Fopen))!=EOF){

stat[c%96]++;

stat[0]++;

}

4. 选择HT[1…i-1]中Parent为0且Weight最小的一个点的函数：

void Select(HuffmanTree HT, int d, int *s){

int temp=0,i;

for(i=1; i<=d; i++){

if(HT[i].parent!=0)

continue;

if(temp==0)

temp=i;

else if(HT[temp].weight>HT[i].weight)

temp=i;

}

*s=temp;

}

5. Huffman树的建立函数：

void HuffmanCoding(int* w, int n){

int m= 2*n-1;

int i, s1, s2, start;

unsigned int c, f;

char *cd;

if(n<=1)

return;

HT=(HTNode*)malloc((m+1)*sizeof(HTNode));/*0号单元未用*/

for(i=1; i<=n; i++, w++){

/***p={*w,0,0,0}; 初始化*/

HT[i].weight=*w;

HT[i].parent=0;

HT[i].lchild=0;

HT[i].rchild=0;

}

for(; i<=m; i++){

/**p={0,0,0,0};*/

HT[i].weight=*w;

HT[i].parent=0;

HT[i].lchild=0;

HT[i].rchild=0;

}

for(i=n+1; i<=m; i++){

Select(HT, i-1, &s1);/*选择HT[1…i-1]中Parent为0且Weight最小的一个点*/

HT[s1].parent=i;

Select(HT, i-1, &s2);

HT[s2].parent=i;

HT[i].lchild=s1;/*左右孩子对编码没有影响，也说明的Huffman树的不唯一性*/

HT[i].rchild=s2;

HT[i].weight= HT[s1].weight + HT[s2].weight;

}

HC = (HuffmanCode) malloc ((n+1) * sizeof(char*));/*分配n个字符编码的头指针向量*/

cd = (char*) malloc(n*sizeof(char));

cd[n-1]='"0';

for(i=1; i<=n; i++){

start =n-1;

for(c=i, f=HT[i].parent; f!=0; c=f, f=HT[f].parent)

if(HT[f].lchild==c)

cd[--start]='0';

else

cd[--start]='1';

HC[i]=(char*)malloc((n-start)*sizeof(char));

strcpy(HC[i], &cd[start]);

}

free(cd);

}

6. Huffman树的存储：

void WriteCode(){

FILE *Fopen;

int i;

Fopen=fopen("./Run.txt","a+");

fprintf(Fopen,"NO.2: The array of the structure of HuffmanTree:"n");

for(i=1;i<52;i++)

fprintf(Fopen,"%-2d: p:%-3d w:%-5d l:%-3d r:%-3d"n",i, HT[i].parent,HT[i].weight,HT[i].lchild,HT[i].rchild);

fprintf(Fopen,"NO.3: The Huffman codes:"n");

for(i=1;i<27;i++)

fprintf(Fopen,"%c: %3d %s"n",'a'+i-1,stat[i],HC[i]);

}

7. 输出压缩前应该得到的比特流：

void OutCompress(){

FILE *Fin,*Fout;

int c;

Fin=fopen("./input.txt","r");

Fout=fopen("./bytes.txt","w");

if(Fin==NULL)

printf("Can't find the file of 'input'!"n");

while((c=fgetc(Fin))!=EOF){/*output the bit stream*/

fprintf(Fout,"%s",HC[c-96]);

}

fclose(Fin);

fclose(Fout);

}

8. 压缩函数：

void Compress(){

FILE *Fin,*Fout;

unsigned char out=0;

char c,*buf;

int i,count=0;

buf=(char *)malloc(sizeof(char)*20);

Fin=fopen("./input.txt","r");

Fout=fopen("./compress.txt","wb");

if(Fin==NULL)

printf("Can't find the file of 'input'!"n");

while((c=fgetc(Fin))!=EOF){/*output the bit stream*/

buf=HC[c-96];

for(i=0;buf[i]!='"0';i++){

if(count==8){

fprintf(Fout,"%c",out);

count=0;

out=0;

}

out<<=1;

count++;

if(buf[i]=='1')

out=out|1;

}

if(count!=8){

out<<=(8-count);

fprintf(Fout,"%c",out);

}

fclose(Fin);

fclose(Fout);

}

9. 解压函数：

void UnCompress(){

FILE *Fin,*Fout,*Fout1;

unsigned char c=0,*buf;

int i,k,t=51;

buf=(char *)malloc(sizeof(char)*10);

Fin=fopen("./compress.txt","rb");

Fout=fopen("./uncompress.txt","wb");

Fout1=fopen("./output.txt","wb");

if(Fin==NULL)

printf("Can't find the file of 'input'!"n");

while(!feof(Fin)){/*output the bit stream*/

c=fgetc(Fin);

for(i=7;i>=0;i--){

k=(c>>i)&1;

fprintf(Fout,"%d",k);

if(k==0)

t=HT[t].lchild;

else

t=HT[t].rchild;

if(HT[t].lchild==0&&HT[t].rchild==0){

fprintf(Fout1,"%c",96+t);

t=51;

}

fclose(Fin);

fclose(Fout);

}

10. 主函数入口

int main(int arg, char *argv[]){/*存放Huffman编码*/

CreatFile();

ReadStat("./input.txt");

HuffmanCoding(&stat[1], 26);

WriteCode();

OutCompress();

Compress();

UnCompress();

system("Echo Pree any key to kill the program");

system("PAUSE>NUL 2>NUL");

}

四、 调试分析

1. 二进制流与unsigned char之间的转换，采用的是位操作，而不是进制的转换，导致失败

2. 位操作的使用出错，将c<<1错误理解为对C的左移，导致无法成功压缩，后改为C=<<1即可

3. 文件的读入方式有二进制读入和文本读入，当压缩文件中写入^Z（文本文件结束标志）后，文本方式读取压缩文件失败，最后使用二进制读取成功

4. 文件的压缩效率问题上我考虑过用unsigned int来替换unsigned char，但结果是失败的，压缩率反而降低

5. 由于比特流中二进制数%sizeof(unsigned char)不一定为零，此异常（解压文件结尾处会多出2个字符）处理仍没有得到有效解决！

6. 因为输入文件是随机为生，每个字母出现的频率相近，因此得到的Huffman编码长度也基本相同，在这种情况也就是最差的情况下，最后压缩率达到60%，让人振奋！

五、 测试结果:

经多次按要求合法测试，测试程序执行的无误，压缩率也在60%以下，下面是测试结果的一个范例:

其中输入文件大小为10,000个随机字母，大小为：10,000 字节

产生的压缩文件大小为：5,939 字节

解压后文件大小仍为：10,000 字节

压缩率接近：59.4%

六、 附录（源程序清单）

源代码：Huffman.C 产生的文件有：

input.txt 被压缩文件

output.txt 解压后文件

compress.txt 压缩后文件

bytes.txt 压缩时的比特流

uncompress.txt 解压时得到的比特流

Run.txt 程序运行过程记录

代码如下:

Code

posted on 2009-05-27 12:57 yangjie 阅读(2027) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

公告