20230328 5.2. 哈夫曼树与哈夫曼编码
概念
哈夫曼树(Huffman Tree)
解决的问题:如何根据结点不同的查找频率构造更有效的搜索树?
带权路径长度(WPL):设二叉树有n个叶子结点,每个叶子结点带有权值 wk,从根结点到每个叶子结点的长度为 lk,则每个叶子结点的带权路径长度之和就是:\(WPL=\sum_{k=1}^nW_kl_k\)
最优二叉树或哈夫曼树: WPL最小的二叉树
构造方法:每次把权值最小的两棵二叉树合并
哈夫曼树的特点:
- 没有度为1的结点;
- 哈夫曼树的任意非叶节点的左右子树交换后仍是哈夫曼树;
- n个叶子结点的哈夫曼树共有2n-1个结点;
- 对同一组权值{w1,w2, …… , wn},可能存在不同构的两棵哈夫曼树,例如 一组权值
哈夫曼编码
给定一段字符串,如何对字符进行编码,可以使得该字符串的编码存储空间最少?
[例] 假设有一段文本,包含58个字符,并由以下7个字符构:a,e,i,s,t,空格(sp),换行(nl);这7个字符出现的次数不同。如何对这7个字符进行编码,使得总编码空间最少?
怎么进行不等长编码?
如何避免二义性?
前缀码prefix code:任何字符的编码都不是另一字符编码的前缀
- 可以无二义地解码
二叉树用于编码
用二叉树进行编码:
- 左右分支:0、1
- 字符只在叶结点上
Java 关联
用 PriorityQueue 帮助构建哈夫曼树
import org.jetbrains.annotations.NotNull;
/**
* 树节点
*
* @author owenwjhuang
* @date 2023/3/28
*/
public class HuffmanTreeNode<T> implements Comparable<HuffmanTreeNode<T>> {
/**
* 权重
*/
int weight;
/**
* 业务实体
*/
T entity;
HuffmanTreeNode<T> left;
HuffmanTreeNode<T> right;
public HuffmanTreeNode(int weight, T entity) {
this.weight = weight;
this.entity = entity;
}
public HuffmanTreeNode(int weight, T entity, HuffmanTreeNode<T> left, HuffmanTreeNode<T> right) {
this.weight = weight;
this.entity = entity;
this.left = left;
this.right = right;
}
@Override
public String toString() {
return "HuffmanTreeNode{" +
"weight=" + weight +
", entity=" + entity +
'}';
}
@Override
public int compareTo(@NotNull HuffmanTreeNode<T> o) {
return weight - o.weight;
}
}
import cn.hutool.core.collection.ListUtil;
import cn.hutool.core.lang.Console;
import java.util.List;
import java.util.PriorityQueue;
/**
* 哈夫曼树
*
* @author owenwjhuang
* @date 2023/3/28
*/
public class HuffmanTree<T> {
HuffmanTreeNode<T> root;
/**
* 参考课件上的代码
* @param priorityQueue
*/
private HuffmanTree(PriorityQueue<HuffmanTreeNode<T>> priorityQueue) {
int size = priorityQueue.size();
// 做 size-1 次合并
for (int i = 1; i < size; i++) {
HuffmanTreeNode<T> left = priorityQueue.poll();
HuffmanTreeNode<T> right = priorityQueue.poll();
HuffmanTreeNode<T> parent = new HuffmanTreeNode<>(left.weight + right.weight, null, left, right);
priorityQueue.add(parent);
}
root = priorityQueue.poll();
}
public static <T> HuffmanTree<T> buildFromPriorityQueue(PriorityQueue<HuffmanTreeNode<T>> priorityQueue) {
return new HuffmanTree<>(priorityQueue);
}
/**
* 层次打印树节点
*/
public void printTree() {
List<HuffmanTreeNode<T>> list = ListUtil.toList();
list.add(root);
while (!list.isEmpty()) {
Console.log(list);
List<HuffmanTreeNode<T>> newList = ListUtil.toList();
for (HuffmanTreeNode<T> treeNode : list) {
if (treeNode.left != null) {
newList.add(treeNode.left);
}
if (treeNode.right != null) {
newList.add(treeNode.right);
}
}
list = newList;
}
}
}
import lombok.AllArgsConstructor;
import lombok.ToString;
import java.util.PriorityQueue;
/**
* Demo 使用示例
*
* @author owenwjhuang
* @date 2023/3/28
*/
public class CodeDemo {
/**
* 业务类 Dto
*/
@ToString
@AllArgsConstructor
static class BizDto {
/**
* 字符编码
*/
String code;
/**
* 频率
*/
int freq;
static HuffmanTreeNode<BizDto> buildNode(String code, int freq) {
return new HuffmanTreeNode<>(freq, new BizDto(code, freq));
}
static HuffmanTreeNode<BizDto> buildNode(BizDto dto) {
return new HuffmanTreeNode<>(dto.freq, new BizDto(dto.code, dto.freq));
}
}
public static void main(String[] args) {
PriorityQueue<HuffmanTreeNode<BizDto>> priorityQueue = new PriorityQueue<>();
priorityQueue.add(BizDto.buildNode("a", 10));
priorityQueue.add(BizDto.buildNode("e", 15));
priorityQueue.add(BizDto.buildNode("i", 12));
priorityQueue.add(BizDto.buildNode("s", 3));
priorityQueue.add(BizDto.buildNode("t", 4));
priorityQueue.add(BizDto.buildNode("sp", 13));
priorityQueue.add(BizDto.buildNode("nl", 1));
HuffmanTree<BizDto> huffmanTree = HuffmanTree.buildFromPriorityQueue(priorityQueue);
huffmanTree.printTree();
}
}