使用优先队列构建赫夫曼树
关于赫夫曼编码和赫夫曼树的相关知识可參考之前两篇文章(由二叉树构造赫夫曼树、赫夫曼编码)。
本文介绍还有一种构建赫夫曼树的方式,採用优先队列.
1.首先我们须要统计不同字符出现的次数。一个字符出现的次数越多,说明其优先级越高,其赫夫曼编码应该越短;
2.将待编码的字符(即带权节点)存入优先级队列,优先级即字符出现的次数;
3.不断迭代队列,直到队列中剩下一个元素(即根节点)。每次从队列中取出两个优先级最小的元素(优先级队列中的元素是依照节点优先级顺序排列的),然后生成一个新的赫夫曼树节点,节点左右孩子分别指向这两个元素,节点权值为这两个元素权值之和。将新节点插入队列;
4.依据赫夫曼树生成赫夫曼编码表。递归遍历赫夫曼树,对每一个叶子节点进行编码(左0右1);
5.编码过程即对每一个字符查编码表的过程;
6.解码过程即依据编码串訪问赫夫曼树的过程.
实现:
/***************************************************** 赫夫曼编码优先队列版 by Rowandjj 2014/6/17 *****************************************************/ #include<iostream> using namespace std; typedef struct _HTNODE_//赫夫曼树节点的存储结构 { char symbol;//符号(ASC码值) struct _HTNODE_ *left;//左孩子 struct _HTNODE_ *right;//右孩子 }HtNode; typedef struct _HTREE_//赫夫曼树 { HtNode *root; }HTree; typedef struct _HLNODE_//赫夫曼码表节点 { char symbol;//ASC码值 char *code;//相应的编码 struct _HLNODE_ *next; }HlNode; typedef struct _HLTABLE_//赫夫曼码表(以单链表形式存储不同ASC码及与之相应的编码表) { HlNode *first; HlNode *last; }HlTable; //-------------------------------------------------------- #define MAX_SZ 256//队列最大长度 #define TYPE HtNode*//优先队列的节点定义 typedef struct _PQUEUENODE_//队列节点定义 { TYPE val;//节点值 unsigned int priority;//优先级 struct _PQUEUENODE_ *next; }pQueueNode; typedef struct _PQUEUE_//优先队列定义 { unsigned int size;//队列长度 pQueueNode *first;//首节点 }pQueue; //-----------优先队列操作定义------------------------------ void InitQueue(pQueue **queue);//队列初始化 void AddQueue(pQueue **queue,TYPE val,unsigned int priority);//将一个带优先级的节点插入队列 TYPE GetQueue(pQueue **queue);//队列节点依照优先级顺序出队 void TraverseQueue(pQueue *queue);//遍历队列 //-----------赫夫曼编码操作定义---------------------------- HTree* BuildTree(char *inputString);//创建赫夫曼树 HlTable* BuildTable(HTree *huffmanTree);//构建赫夫曼码表(出现频率最高的ASC码将使用最短的编码) void encode(HlTable *table,char *stringToEncode);//编码 void decode(HTree *tree,char *stringToDecode);//解码 void TraverseTable(HlTable *table);//遍历赫夫曼编码表 //--------------------------------------------------------- void InitQueue(pQueue **queue) { *queue = (pQueue *)malloc(sizeof(pQueue)); (*queue)->first = NULL; (*queue)->size = 0; } void AddQueue(pQueue **queue,TYPE val,unsigned int priority) { if((*queue)->size == MAX_SZ) { cout<<"queue is full!"; return; } pQueueNode *aux = (pQueueNode*)malloc(sizeof(pQueueNode)); aux->priority = priority; aux->val = val; aux->next = NULL; if((*queue)->first == NULL)//生成队列首节点 { (*queue)->first = aux; (*queue)->size++; }else { if(priority <= (*queue)->first->priority)//插到首节点位置 { aux->next = (*queue)->first; (*queue)->first = aux; (*queue)->size++; }else//遍历队列插到合适的位置 { pQueueNode *iterator = (*queue)->first; while(iterator->next != NULL && priority > iterator->next->priority) { iterator = iterator->next; } aux->next = iterator->next; iterator->next = aux; (*queue)->size++; } } } TYPE GetQueue(pQueue **queue) { TYPE returnVal = NULL; if ((*queue)->size > 0) { pQueueNode *nodeToDel = (*queue)->first; (*queue)->first = (*queue)->first->next; returnVal = nodeToDel->val; free(nodeToDel); (*queue)->size--; }else { cout<<"queue is empty"<<endl; } return returnVal; } //------------------------------------------------------------- HTree* BuildTree(char *inputString) { //统计字符出现次数 int *probability = (int*)malloc(sizeof(int)*256); int i; for(i = 0; i < 256;i++) { probability[i] = 0; } for(i = 0; inputString[i] != '\0';i++) { probability[(unsigned char)inputString[i]]++; } /* for(i = 0; i < 256; i++)//打印字符出现的次数(也即频率) { if(probability[i] != 0) cout<<(char)i<<":"<<probability[i]<<endl; } */ //依据每一个字符出现的频率生成赫夫曼树 pQueue* huffmanQueue; InitQueue(&huffmanQueue); for(i = 0; i < 256; i++) { //1 现将带权节点加到队列中 if(probability[i] != 0)//仅仅考虑带权节点,也就是说仅仅对带权节点编码 { HtNode *aux = (HtNode*)malloc(sizeof(HtNode)); aux->symbol = (char)i; aux->left = NULL; aux->right = NULL; AddQueue(&huffmanQueue,aux,probability[i]); } } cout<<"字符及出现频次:"<<endl; TraverseQueue(huffmanQueue); free(probability); //2 再构建赫夫曼树 while(huffmanQueue->size != 1) { int priority = huffmanQueue->first->priority; priority += huffmanQueue->first->next->priority; //从队列中获取两个优先级最低的节点 HtNode *left = GetQueue(&huffmanQueue); HtNode *right = GetQueue(&huffmanQueue); //生成其父节点,父节点的权值为两个子节点的权值之和 HtNode *newNode = (HtNode*)malloc(sizeof(HtNode)); newNode->left = left; newNode->right = right; //将生成的父节点插入到优先队列的合适位置 AddQueue(&huffmanQueue,newNode,priority); } HTree *tree = (HTree*)malloc(sizeof(HTree)); tree->root = GetQueue(&huffmanQueue); return tree; } void travelseTree(HtNode *treeNode,HlTable **table,int k,char code[256]) { if(treeNode->left == NULL && treeNode->right == NULL)//走到叶子节点 { code[k] = '\0'; HlNode *aux = (HlNode*)malloc(sizeof(HlNode)); aux->symbol = treeNode->symbol; aux->code = (char *)malloc(sizeof(char)*(strlen(code)+1)); strcpy(aux->code,code); aux->next = NULL; if((*table)->first == NULL)//连接 { (*table)->first = (*table)->last = aux; }else { (*table)->last->next = aux; (*table)->last = aux; } } if(treeNode->left != NULL) { code[k] = '0'; travelseTree(treeNode->left,table,k+1,code); } if(treeNode->right != NULL) { code[k] = '1'; travelseTree(treeNode->right,table,k+1,code); } } HlTable* BuildTable(HTree *huffmanTree)//由赫夫曼树构建赫夫曼编码表 { HlTable *table = (HlTable*)malloc(sizeof(HlTable)); table->first = NULL; table->last = NULL; char code[256]; int k = 0; travelseTree(huffmanTree->root,&table,k,code); return table; } void encode(HlTable *table,char *stringToEncode) { HlNode *travel; for(int i = 0; stringToEncode[i] != '\0'; i++) { travel = table->first; while(travel->symbol != stringToEncode[i]) { travel = travel->next; } cout<<travel->code<<" "; } } void decode(HTree *tree,char *stringToDecode) { HtNode *travel = tree->root; for(int i = 0;stringToDecode[i] != '\0'; i++) { if(travel->left == NULL && travel->right == NULL) { cout<<travel->symbol; travel = tree->root; } if(stringToDecode[i] == '0') { travel = travel->left; } if(stringToDecode[i] == '1') { travel = travel->right; } if(stringToDecode[i] != '0' && stringToDecode[i] != '1') { cout<<"\ndecode error..."<<endl; } } if(travel->left == NULL && travel->right == NULL) { cout<<travel->symbol; travel = tree->root; } } void TraverseQueue(pQueue *queue) { pQueueNode *iterator = queue->first; while(iterator != NULL) { cout<<iterator->val->symbol<<":"<<iterator->priority<<endl; iterator = iterator->next; } } void TraverseTable(HlTable *table) { HlNode *iterator = table->first; cout<<"字符"<<":"<<"编码"<<endl; while(iterator != NULL) { cout<<iterator->symbol<<":"<<iterator->code<<endl; iterator = iterator->next; } cout<<endl; } int main() { HTree *codeTree = BuildTree("abbccccddddddd"); HlTable *codeTable = BuildTable(codeTree); TraverseTable(codeTable); cout<<"编码開始:"<<endl; encode(codeTable,"abcd"); cout<<"\n解码開始:"<<endl; decode(codeTree,"10100100000101"); cout<<endl; return 0; }
測试结果:
优先队列变化过程:
生成的赫夫曼树:
注:这里的构建赫夫曼树的约定和之前的两篇不太一样,这里约定一个节点的左节点的权值小于右节点的权值.