哈夫曼编码

基本术语:

  路径和路径长度:

    在一棵树中,从一个结点往下可以达到的孩子或孙子结点之间的通路,称为路径。通路中分支的数目称为路径长度。若规定根结点的层数为1,则从根           结点到第L层结点的路径长度为L-1。

  结点的权及带权路径长度:

    若将树中结点赋给一个有着某种含义的数值,则这个数值称为该结点的权。结点的带权路径长度为:从根结点到该结点之间的路径长度与该结点的权的         乘积。

  树的带权路径长度:

    树的带权路径长度规定为所有叶子结点的带权路径长度之和,记为WPL。

哈夫曼树:  

  给定N个权值作为N个叶子节点,构造一棵二叉树,若该树的带权路径长度达到最小,称这样的二叉树为最优二叉树,也称为哈夫曼树(Huffman Tree)。         哈夫曼树是带权路径长度最短的树,权值较大的结点离根较近。

复制代码
package data_structure_and_algorithm.huffman;

import lombok.Data;

/**
 * 哈夫曼节点
 */
@Data
class HuffmanNode implements Comparable<HuffmanNode> {

    /**
     * 字符
     */
    private Byte character;

    /**
     * 权重
     */
    private int weight;

    /**
     * 左子节点
     */
    private HuffmanNode left;

    /**
     * 右子节点
     */
    private HuffmanNode right;

    public HuffmanNode(int weight, Byte character) {
        this.weight = weight;
        this.character = character;
    }

    @Override
    public String toString() {
        return "HuffmanNode{" +
                "weight=" + weight + "character=" + character +
                '}';
    }

    @Override
    public int compareTo(HuffmanNode o) {
        return this.weight - o.weight;
    }
}
复制代码
复制代码
package data_structure_and_algorithm.huffman;

import cn.hutool.core.collection.CollUtil;
import lombok.Data;
import java.util.Collections;
import java.util.List;

/**
 * 哈夫曼树
 */
@Data
public class HuffmanTree {
    /**
     * 根节点
     */
    private HuffmanNode root;

    /**
     * 构建哈夫曼树
     * @param huffmanNodes
     */
    public void createHuffmanTree(List<HuffmanNode> huffmanNodes) {
        if (CollUtil.isEmpty(huffmanNodes)) {
            return;
        }
        //当集合中只剩下一个节点,则哈夫曼树构建完成,且剩下的那个节点就是哈夫曼树的根节点
        while (huffmanNodes.size() > 1) {
            //将集合变得有序(从小到大)
            Collections.sort(huffmanNodes);
            //取其中最小的两个节点作为子节点,构成一棵二叉树,该二叉树根节点的权重为两个子节点权重之和
            HuffmanNode right = huffmanNodes.get(0);
            HuffmanNode left = huffmanNodes.get(1);
            HuffmanNode fatherNode = new HuffmanNode(left.getWeight() + right.getWeight(), null);
            fatherNode.setLeft(left);
            fatherNode.setRight(right);
            //从集合中删除这两个节点并且添加fatherNode节点
            huffmanNodes.remove(left);
            huffmanNodes.remove(right);
            huffmanNodes.add(fatherNode);
        }
        this.root = huffmanNodes.get(0);
    }

    /**
     * 前序遍历
     */
    public void preorderTraversal() {
        preorderTraversal(root);
    }

    public void preorderTraversal(HuffmanNode node) {
        System.out.println(node);
        if (node.getLeft() != null) {
            preorderTraversal(node.getLeft());
        }
        if (node.getRight() != null) {
            preorderTraversal(node.getRight());
        }
    }
}
复制代码

哈夫曼编码:

  Huffman于1952年提出一种编码方法,是可变字长编码(VLC)的一种,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。

 哈夫曼编码:

复制代码
package data_structure_and_algorithm.huffman;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.StrUtil;
import lombok.Data;
import lombok.NoArgsConstructor;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 哈夫曼编码
 */
@Data
@NoArgsConstructor
public class HuffmanCode {

    /**
     * 哈夫曼码是否被8整除
     */
    private boolean integralDivision;

    /**
     * 原字节和哈夫曼码的映射
     */
    private Map<Byte, String> huffmanCodeMap;

    //哈夫曼编码:设定左分支为0,右分支为1
    private static final String LEFT_BRANCH = "0";
    private static final String RIGHT_BRANCH = "1";

    /**
     * 创建原字节和权重的映射
     * @param srcBytes
     * @return
     */
    private Map<Byte, Integer> createWeightMap(byte[] srcBytes) {
        Map<Byte, Integer> weightMap = new HashMap<>();
        for (byte srcByte : srcBytes) {
           if (weightMap.get(srcByte) == null) {
               weightMap.put(srcByte, 1);
           } else {
               weightMap.put(srcByte, weightMap.get(srcByte) + 1);
           }
        }
        return weightMap;
    }

    /**
     * 构建哈夫曼树
     * @param weightMap
     * @return
     */
    private HuffmanTree createHuffmanTree(Map<Byte, Integer> weightMap) {
        HuffmanTree huffmanTree = new HuffmanTree();
        List<HuffmanNode> huffmanNodes = new ArrayList<>(weightMap.size());
        for (Map.Entry<Byte, Integer> entry : weightMap.entrySet()) {
            HuffmanNode huffmanNode = new HuffmanNode(entry.getValue(), entry.getKey());
            huffmanNodes.add(huffmanNode);
        }
        huffmanTree.createHuffmanTree(huffmanNodes);
        return huffmanTree;
    }

    /**
     * 原字节和哈夫曼码的映射
     * @param root
     * @param srcBytes
     */
    private void createHuffmanCodeMap(HuffmanNode root, byte[] srcBytes) {
        huffmanCodeMap = new HashMap<>(srcBytes.length);
        //原文件只有一个字符
        if (root.getLeft() == null && root.getRight() == null) {
            huffmanCodeMap.put(root.getCharacter(), "0");
            return;
        }
        StringBuilder huffmanCode = new StringBuilder();
        createHuffmanCodeMap(root, huffmanCode, "");
    }


    private void createHuffmanCodeMap(HuffmanNode node, StringBuilder huffmanCode, String branch) {
        StringBuilder huffmanCodeMethod = new StringBuilder(huffmanCode);
        huffmanCodeMethod.append(branch);
        if (node.getLeft() != null) {
            createHuffmanCodeMap(node.getLeft(), huffmanCodeMethod, LEFT_BRANCH);
        }
        if (node.getRight() != null) {
            createHuffmanCodeMap(node.getRight(), huffmanCodeMethod, RIGHT_BRANCH);
        }
        //叶子节点与哈夫曼码的映射
        if (node.getCharacter() == null) {
            return;
        }
        huffmanCodeMap.put(node.getCharacter(), huffmanCodeMethod.toString());
    }

    /**
     * 生成哈夫曼码(补码)
     * @param srcBytes
     * @return
     * @throws Exception
     */
    private String generateHuffmanCodes(byte[] srcBytes) throws Exception {
        StringBuilder huffmanCodes = new StringBuilder();
        for (byte srcByte : srcBytes) {
            if (StrUtil.isBlank(huffmanCodeMap.get(srcByte))) {
                throw new Exception("压缩失败,请重试!");
            }
            huffmanCodes.append(huffmanCodeMap.get(srcByte));
        }
        return huffmanCodes.toString();
    }

    /**
     * 压缩后的字节数组
     * @param huffmanCodes
     * @return
     */
    private byte[] zipBytes(String huffmanCodes) {
        if (StrUtil.isBlank(huffmanCodes)) {
            return new byte[0];
        }
        if (huffmanCodes.length() % 8 == 0) {
            integralDivision = true;
        } else {
            integralDivision = false;
        }
        //将哈夫曼码转为字节数组之后的长度
        int length = (huffmanCodes.length() + 7) / 8;

        byte[] zipBytes = new byte[length];
        //每八个bit组成一个byte(1 byte = 8 bit)
        int j = 0;
        for (int i = 0; i < length; i++) {
            if (j + 8 > huffmanCodes.length()) {
                //余下的哈夫曼补码不足8位
                zipBytes[i] = (byte)Integer.parseUnsignedInt(huffmanCodes.substring(j), 2);
            } else {
                zipBytes[i] = (byte)Integer.parseUnsignedInt(huffmanCodes.substring(j, j + 8), 2);
                j += 8;
            }
        }

        return zipBytes;
    }

    /**
     * 生成压缩后的字节数组
     * @param srcBytes
     * @return
     * @throws Exception
     */
    public byte[] generateZipBytes(byte[] srcBytes) throws Exception {
        if (srcBytes == null || srcBytes.length == 0) {
            throw new Exception("压缩失败,请检查原文件!");
        }

        System.out.println("========原文件与频次的映射=========");
        Map<Byte, Integer> weightMap = createWeightMap(srcBytes);
        for (Map.Entry<Byte, Integer> entry : weightMap.entrySet()) {
            System.out.print(entry.getKey() + "->" + entry.getValue() + "  ");
        }
        System.out.println();

        //System.out.println("========根据原文件构建的哈夫曼树=========");
        HuffmanTree huffmanTree = createHuffmanTree(weightMap);
        //huffmanTree.preorderTraversal();

        System.out.println("========原字节和哈夫曼码的映射=========");
        createHuffmanCodeMap(huffmanTree.getRoot(), srcBytes);
        for (Map.Entry<Byte, String> entry : huffmanCodeMap.entrySet()) {
            System.out.print(entry.getKey() + "->" + entry.getValue() + "  ");
        }
        System.out.println();

        System.out.println("========最终的哈夫曼码(补码)=========");
        String huffmanCodes = generateHuffmanCodes(srcBytes);
        System.out.println(huffmanCodes + "(长度:" + huffmanCodes.length() + ")");

        System.out.println("========压缩后的字节数组=========");
        byte[] zipBytes = zipBytes(huffmanCodes);
        return zipBytes;
    }

    /**
     * 解码
     * @param zipBytes
     * @return
     */
    public byte[] decode(byte[] zipBytes) throws Exception {
        if (zipBytes == null || zipBytes.length == 0) {
            return new byte[0];
        }
        String huffmanCodes = changeBytesToCodes(zipBytes);
        return changeCodesToBytes(huffmanCodes);
    }

    /**
     * 将压缩的字节数组转为哈夫曼码(补码)
     * @param zipBytes
     * @return
     */
    private String changeBytesToCodes(byte[] zipBytes) {
        StringBuilder huffmanCodes = new StringBuilder();
        for (int i = 0; i < zipBytes.length; i++) {
            int temp = zipBytes[i];
            String binaryString = null;
            if (temp >= 0) {
                //如果是正数则需要根据情况补0
                if (i == zipBytes.length - 1 && !integralDivision) {
                    //不用补0
                    binaryString = Integer.toBinaryString(temp);
                } else {
                    temp |= 256;
                    binaryString = Integer.toBinaryString(temp);
                    binaryString = binaryString.substring(binaryString.length() - 8);
                }
            } else {
                //如果是负数则需要截取后8位
                binaryString = Integer.toBinaryString(temp);
                binaryString = binaryString.substring(binaryString.length() - 8);
            }
            huffmanCodes.append(binaryString);
        }
        return huffmanCodes.toString();
    }

    /**
     * 将哈夫曼码转为原字节数组
     * @param haffmanCodes
     * @return
     */
    private byte[] changeCodesToBytes(String haffmanCodes) throws Exception {
        if (CollUtil.isEmpty(huffmanCodeMap)) {
            throw new Exception("未压缩,无法解压!");
        }
        List<Byte> srcBytes = new ArrayList<>();
        //哈夫曼码和原字节的映射
        Map<String, Byte> srcBytesMap = new HashMap<>(huffmanCodeMap.size());
        for (Map.Entry<Byte, String> entry : huffmanCodeMap.entrySet()) {
            srcBytesMap.put(entry.getValue(), entry.getKey());
        }
        //待匹配的尾下标
        int count = 1;
        for (int i = 0; i < haffmanCodes.length();) {
            Byte srcByte =  srcBytesMap.get(haffmanCodes.substring(i, count));
            if (srcByte != null) {
                srcBytes.add(srcByte);
                i = count;
            }
            count++;
        }
        byte[] bytes = new byte[srcBytes.size()];
        for (int i = 0; i < srcBytes.size(); i++) {
            bytes[i] = srcBytes.get(i);
        }
        return bytes;
    }
}
复制代码

利用哈夫曼编码对文件压缩与解压:

复制代码
package data_structure_and_algorithm.huffman;

import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.Map;

/**
 * 哈夫曼文件压缩和解压
 */
public class HuffmanFile {

    /**
     * 压缩文件
     * @param srcFile
     * @param dstFile
     */
    public static void zipFile(String srcFile, String dstFile) {
        OutputStream os = null;
        ObjectOutputStream oos = null;
        FileInputStream is = null;
        try {
            is = new FileInputStream(srcFile);
            byte[] srcBytes = new byte[is.available()];
            is.read(srcBytes);
            //压缩
            HuffmanCode huffmanCode = new HuffmanCode();
            byte[] zipBytes = huffmanCode.generateZipBytes(srcBytes);
            os = new FileOutputStream(dstFile);
            oos = new ObjectOutputStream(os);
            oos.writeObject(zipBytes);
            oos.writeObject(huffmanCode.getHuffmanCodeMap());
        } catch (Exception e) {
            System.out.println(e.getMessage());
        } finally {
            if (oos != null) {
                try {
                    oos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (os != null) {
                try {
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    /**
     * 解压
     * @param zipFile
     * @param dstFile
     */
    public static void unZipFile(String zipFile, String dstFile) {
        InputStream is = null;
        ObjectInputStream ois = null;
        OutputStream os = null;
        try {
            is = new FileInputStream(zipFile);
            ois = new ObjectInputStream(is);
            byte[] zipBytes = (byte[]) ois.readObject();
            Map<Byte, String> huffmanCodeMap = (Map<Byte, String>) ois.readObject();
            //解压
            HuffmanCode huffmanCode = new HuffmanCode();
            huffmanCode.setHuffmanCodeMap(huffmanCodeMap);
            byte[] decodes = huffmanCode.decode(zipBytes);
            os = new FileOutputStream(dstFile);
            os.write(decodes);
        } catch (Exception e) {
            System.out.println(e.getMessage());
        } finally {
            if (os != null) {
                try {
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (ois != null) {
                try {
                    ois.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (is != null) {
                try {
                    is.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
}
复制代码

 测试代码:

复制代码
package data_structure_and_algorithm.huffman;

public class HuffmanTest {
    public static void main(String[] args) throws Exception {
        //huffmanTest01();
        //huffmanTest02("E:\\temp\\zipFile.bmp", "E:\\temp\\srcFile.png");
        huffmanTest03("E:\\temp\\dstFile.png", "E:\\temp\\zipFile.bmp");
    }

    public static void huffmanTest01() throws Exception {
        //原文件
        String srcCode = "Do you like like a java ?";
        byte[] srcCodeBytes = srcCode.getBytes();
        System.out.println("========原文件补码值=========");
        for (byte srcCodeByte : srcCodeBytes) {
            System.out.print(srcCodeByte + "  ");
        }
        System.out.println();

        HuffmanCode huffmanCode = new HuffmanCode();
        byte[] zipBytes = null;
        try {
            zipBytes = huffmanCode.generateZipBytes(srcCodeBytes);
            for (byte zipByte : zipBytes) {
                System.out.print(zipByte + "  ");
            }
            System.out.println();
        } catch (Exception e) {
            System.out.println(e.getMessage());
        }
        System.out.println("========原文件补码值=========");
        byte[] srcBytes = huffmanCode.decode(zipBytes);
        for (byte srcByte : srcBytes) {
            System.out.print(srcByte + "  ");
        }
        System.out.println();
    }

    public static void huffmanTest02(String dstFile, String srcFile) {
        HuffmanFile.zipFile(srcFile, dstFile);
    }

    public static void huffmanTest03(String dstFile, String zipFile) {
        HuffmanFile.unZipFile(zipFile, dstFile);
    }
}
复制代码

 huffmanTest01的哈夫曼树图解:

 

 

 

 

  

posted @   Java厨师长  阅读(118)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 单线程的Redis速度为什么快?
· SQL Server 2025 AI相关能力初探
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 展开说说关于C#中ORM框架的用法!
点击右上角即可分享
微信分享提示