哈夫曼编码的实现
实现方案:
1. 建立哈夫曼树
(1)统计各个字符出现次数,按次数从小到大的顺序生成队列;
(2)每次获取队列前两个节点(即队列中字符出现次数最少的两个),同时队头前移两位,将它们的字符出现次数相加,得到一个新节点,插入到队列合适位置;
(3)当队列中仅剩一个节点时,哈夫曼树构造完毕,该节点即为树的根节点。
2. 建立哈夫曼编码表
(1)从哈夫曼树根开始遍历,遍历结束的同时编码表也构造完成,整个过程用递归实现,注意结束条件为遍历到叶子节点;
(2)如果当时节点的左孩子不为空,则code[k]='0',递归遍历其左子树;
(3)如果当时节点的右孩子不为空,则code[k]='1',递归遍历其右子树;
(4)如果当时节点为叶子,则code[k]='\0',构造新的编码表节点,并将其插入编码表。
3. 将字符串和哈夫曼编码互换,测试
代码如下,时常翻看,细细揣摩,大有裨益:
#include <stdio.h> #include <string.h> #include <stdlib.h> #define MAX_SZ 256 #define TYPE htNode * // 哈夫曼树节点 typedef struct htNode{ char symbol; struct htNode *left; struct htNode *right; }htNode; // 哈夫曼树 typedef struct htTree{ htNode *root; }htTree; // 队列节点 typedef struct pQueueNode{ TYPE val; unsigned int priority; struct pQueueNode *next; }pQueueNode; // 队列 typedef struct pQueue{ unsigned int size; // 队列长度 pQueueNode *first; // 头指针 }pQueue; // 字符编码节点 typedef struct hlNode{ char symbol; char *code; struct hlNode *next; }hlNode; // 字符编码表 typedef struct hlTable{ hlNode *first; hlNode *last; }hlTable; // 初始化哈夫曼队列 void initpQueue(pQueue **q) { (*q) = (pQueue *)malloc(sizeof(pQueue)); (*q)->first = NULL; (*q)->size = 0; return; } // 插入队列 void addpQueue(pQueue **q,TYPE val,unsigned int priority) { if ((*q)->size == MAX_SZ){ printf("Queue is full.\n"); return; } pQueueNode *aux = (pQueueNode *)malloc(sizeof(pQueueNode)); aux->priority = priority; aux->val = val; if ((*q)->first == NULL || (*q)->size == 0){ // 队列为空 aux->next = NULL; (*q)->first = aux; (*q)->size = 1; } else{ // 小于第一个节点 if (priority <= (*q)->first->priority){ aux->next = (*q)->first; (*q)->first = aux; (*q)->size++; return; } // 迭代 else{ pQueueNode *iterator = (*q)->first; while(iterator->next != NULL){ if (priority <= iterator->next->priority){ aux->next = iterator->next; iterator->next = aux; (*q)->size++; return; } iterator = iterator->next; } // 比队列里所有的都大 if (iterator->next == NULL){ aux->next = NULL; iterator->next = aux; (*q)->size++; return; } } } } // 从队列中获取节点 TYPE getpQueue(pQueue **q) { TYPE returnVal; if ((*q)->size > 0){ returnVal = (*q)->first->val; (*q)->first = (*q)->first->next; (*q)->size --; } else{ printf("Queue is empty.\n"); } return returnVal; } htTree *buildTree(char *inputString) { // 256个指针记录256个ASCII码出现的次数 int *probability = (int *)malloc(sizeof(int)*256); // 初始化 for (int i=0; i<256; i++){ probability[i] = 0; } // 统计待编码的字符串各个字符出现的次数 for (int j=0; inputString[j]!='\0'; j++){ probability[inputString[j]]++; } // pQueue队列头指针 pQueue *huffmanQueue; initpQueue(&huffmanQueue); // 填充队列 for (int k=0; k<256; k++){ if (probability[k] != 0){ htNode *aux = (htNode *)malloc(sizeof(htNode)); aux->left = NULL; aux->right = NULL; aux->symbol = (char)k; addpQueue(&huffmanQueue,aux,probability[k]); } } free(probability); // 生成哈夫曼树 while(huffmanQueue->size != 1){ int pri; pri = huffmanQueue->first->priority; pri += huffmanQueue->first->next->priority; htNode *left = getpQueue(&huffmanQueue); htNode *right = getpQueue(&huffmanQueue); htNode *newNode = (htNode *)malloc(sizeof(htNode)); newNode->left = left; newNode->right = right; newNode->symbol = pri+'0'; addpQueue(&huffmanQueue,newNode,pri); } htTree *tree = (htTree *)malloc(sizeof(htTree)); tree->root = getpQueue(&huffmanQueue); return tree; } // 前序遍历 void pre_sequence(htNode *t) { if(t == NULL){ return; } printf("%c",t->symbol); pre_sequence(t->left); pre_sequence(t->right); } void traverseTree(htNode *treeNode, hlTable **table, int k, char code[256]) { // 叶子节点 if (treeNode->left == NULL && treeNode->right == NULL){ code[k] = '\0'; hlNode *aux = (hlNode *)malloc(sizeof(hlNode)); aux->symbol = treeNode->symbol; aux->next = NULL; aux->code = (char *)malloc(sizeof(char)*(strlen(code)+1)); strcpy(aux->code,code); if ((*table)->first == NULL){ (*table)->first = (*table)->last = aux; } else{ (*table)->last->next = aux; (*table)->last = aux; } } if (treeNode->left != NULL){ code[k] = '0'; traverseTree(treeNode->left,table,k+1,code); } if (treeNode->right != NULL){ code[k] = '1'; traverseTree(treeNode->right,table,k+1,code); } } hlTable *buildTable(htTree *huffmanTree) { hlTable *table = (hlTable *)malloc(sizeof(hlTable)); table->first = NULL; table->last = NULL; char code[256]; // 编码 int k = 0; // 层数 // 关键是这个函数 traverseTree(huffmanTree->root,&table,k,code); return table; } // 将字符串转换为哈夫曼编码 void encode(hlTable *table, char *input) { hlNode *iterator; printf("Encode:\n"); printf("Before encode:"); puts(input); printf("After encode:"); for (int i=0; input[i] != '\0'; i++){ iterator = table->first; while(iterator->symbol != input[i]){ iterator = iterator->next; } printf("%s",iterator->code); } printf("\n"); } // 将哈夫曼编码转换为字符串 void decode(htTree *tree, char *input) { htNode *iterator; iterator = tree->root; printf("Decode:\n"); printf("Before Decode:"); puts(input); printf("After Decode:"); for (int i=0; input[i] != '\0'; i++){ // 到达叶子节点 if (iterator->left == NULL && iterator->right == NULL){ printf("%c",iterator->symbol); iterator = tree->root; } if (input[i] == '0'){ iterator = iterator->left; } if (input[i] == '1'){ iterator = iterator->right; } } printf("\n"); } int main() { // 建立哈夫曼树 htTree *codeTree = buildTree("I love FishC.com!"); pre_sequence(codeTree->root); printf("\n"); // 建立哈夫曼编码表 hlTable *codeTable = buildTable(codeTree); // 将字符串转换成哈夫曼编码 encode(codeTable,"I love FishC.com!"); // 将哈夫曼编码转换成字符串 decode(codeTree,"001111100011"); return 0; }
测试结果: