java实现哈夫曼编码压缩

java实现哈夫曼编码压缩

节点类

/**
 * 节点类
 */
class Node implements Comparable<Node> {
    Byte data;
    int weight;

    Node left;
    Node right;

    public Node(int weight) {
        this.weight = weight;
    }

    public Node(Byte data, int weight) {
        this.data = data;
        this.weight = weight;
    }

    /**
     * 前序遍历
     */
    public void preOrder() {
        System.out.println(this);
        if (this.left != null) {
            this.left.preOrder();
        }
        if (this.right != null) {
            this.right.preOrder();
        }
    }

    @Override
    public int compareTo(Node o) {
        return this.weight - o.weight;
    }

    @Override
    public String toString() {
        return "Node{" +
                "data=" + data +
                ", weight=" + weight +
                '}';
    }
}

首先根据字符串数据创建Node集合

/**
 * 将字符数组按出现个数转换成一个个Node,并存到List返回
 *
 * @param bytes 被转换的字符数组
 * @return 转换后的list
 */
private static List<Node> getNodes(byte[] bytes) {
    ArrayList<Node> nodes = new ArrayList<>();
    // 遍历统计出现的次数
    HashMap<Byte, Integer> map = new HashMap<>();
    for (byte b : bytes) {
        Integer integer = map.get(b);
        if (integer == null) {
            map.put(b, 1);
        } else {
            map.put(b, integer + 1);
        }
    }
    // 将map里的元素转成一个个Node
    Set<Map.Entry<Byte, Integer>> entries = map.entrySet();
    for (Map.Entry<Byte, Integer> entry : entries) {
        Node node = new Node(entry.getKey(), entry.getValue());
        // 并加入list
        nodes.add(node);
    }
    return nodes;
}

根据Node集合创建哈夫曼树

/**
 * 根据Node集合创建哈夫曼树
 */
private static Node createHuffmanTree(List<Node> nodes) {
    while (nodes.size() > 1) {
        Collections.sort(nodes);

        Node left = nodes.get(0);
        Node right = nodes.get(1);

        // 通过另一个构造器
        Node parent = new Node(left.weight + right.weight);
        parent.left = left;
        parent.right = right;

        nodes.remove(left);
        nodes.remove(right);
        nodes.add(parent);
    }
    return nodes.get(0);
}

根据哈夫曼树构造哈夫曼编码表

static HashMap<Byte, String> huffmanCodes = new HashMap<>();
static StringBuilder stringbuilder = new StringBuilder();

/**
 * 为了方便调用,重载
 */
private static HashMap<Byte, String> createEncoding(Node root) {
        if (root == null) {
            return null;
        }
        // 为了减少root的一次
        createEncoding(root.left, "0", stringbuilder);
        createEncoding(root.right, "1", stringbuilder);
        return huffmanCodes;
    }

/**
 * 根据哈夫曼树创建对应的哈夫曼编码表
 *
 * @param node          传入的节点
 * @param code          路径:左字节为0,右子节点为1
 * @param stringBuilder 用于拼接路径
 */
private static void createEncoding(Node node, String code, StringBuilder stringBuilder) {
    StringBuilder builder = new StringBuilder(stringBuilder);
    builder.append(code);

    if (node != null) {
        // 判断是不是应该编码的
        if (node.data == null) {
            // 左右递归
            createEncoding(node.left, "0", builder);
            createEncoding(node.right, "1", builder);
        } else {
            // 如果是
            huffmanCodes.put(node.data, builder.toString());
        }
    }
}

将需要压缩的字节数组根据编码表压缩

/**
 * 将字节数组根据编码表压缩
 *
 * @param bytes 被转换的字节数组
 * @return 压缩后的数组
 */
private static byte[] zip(byte[] bytes, HashMap<Byte, String> huffmanCodes) {
    StringBuilder builder = new StringBuilder();
    // 遍历bytes
    for (byte b : bytes) {
        // 根据编码表拿到对应的编码
        String code = huffmanCodes.get(b);
        stringbuilder.append(code);
    }
    // 编码后再次压缩,转成byte数组
    // 先得到byte[]的长度
    int length = stringbuilder.length();
    int len;
    // 或者写成len = (length + 7) / 8;
    if (length % 8 == 0) {
        len = length / 8;
    } else {
        len = length / 8 + 1;
    }
    byte[] result = new byte[len];
    // 每8位对应
    int index = 0;
    for (int i = 0; i < length; i += 8) {
        String str;
        if (i + 8 < length) {
            // 够八位
            str = stringbuilder.substring(i, i + 8);
        } else {
            // 最后不够八位
            str = stringbuilder.substring(i);
        }
        // 转成一个byte存入
        result[index++] = (byte) Integer.parseInt(str);
    }
    return result;
}

把所有方法调用封装起来,方便调用

/**
 * 封装,方便调用
 * @param str 传入的字符串
 * @return 经过哈夫曼编码后的字节数组
 */
private static byte[] huffmanZip(String str) {
    byte[] bytes = str.getBytes();
    // 转成Node集合
    List<Node> nodes = getNodes(bytes);
    // 创建哈夫曼树
    Node huffmanTree = createHuffmanTree(nodes);
    // 创建编码表
    HashMap<Byte, String> encoding = createEncoding(huffmanTree);
    // 压缩并返回
    return zip(bytes, huffmanCodes);
}

测试方法

public static void main(String[] args) {
    // 需要被转换的字符串
    String text = "i like like like java do you like a java";
    byte[] bytes = huffmanZip(text);
    System.out.println(Arrays.toString(bytes));
}
posted @ 2022-04-20 17:25  CoderCatIce  阅读(334)  评论(0编辑  收藏  举报