用c#实现哈夫曼压缩算法

    /// <summary>
    /// hash压缩算法
    /// </summary>
    /// <param name="file"></param>
    /// <param name="cancellationToken"></param>
    /// <returns></returns>
    public async Task<IActionResult> HashCompressAsync(IFormFile file, CancellationToken cancellationToken)
    {
        try
        {
            // 检查文件是否为空
            if (file == null || file.Length == 0)
            {
                return null;
            }

            // 读取传入文件的所有的数据
            byte[] fileData = new byte[file.Length];

            using (var stream = file.OpenReadStream())
            {
                await stream.ReadAsync(fileData, cancellationToken);
            }

            // 统计字符大小(字符频率)
            Dictionary<byte, int> dictionary = new Dictionary<byte, int>();

            foreach (var item in fileData)
            {
                // 判断字典中字符是否存在
                if (dictionary.ContainsKey(item))
                {
                    // 如果有就叠加
                    dictionary[item]++;
                }
                else
                {
                    dictionary[item] = 1;
                }
            }

            // 构建二叉树
            var towTree = BuildHuffmanTree(dictionary,cancellationToken);

            // 生成编码
            var codeTable = new Dictionary<byte,string>();
            GenerateCodeTable(towTree, "", codeTable);

            // 进行压缩
            var compressedData = CompressFileAsync(fileData,codeTable);

            // 将字节数组转换为十六进制字符串,并去除其中的"-"分隔符
            string compressedDataString = BitConverter.ToString(compressedData).Replace("-", "");

            Console.WriteLine(compressedDataString);

            var streamData = new MemoryStream();

            // 将压缩数据转换为字节数组,使用UTF-8编码
            byte[] compressedBytes = Encoding.UTF8.GetBytes(compressedDataString);

            // 将字节数组写入MemoryStream
            streamData.Write(compressedBytes, 0, compressedBytes.Length);

            await streamData.FlushAsync();

            // 将流的位置设置为0,以便从头开始读取数据
            streamData.Seek(0, SeekOrigin.Begin);

            // 返回一个文件
            var fileStreamResult = new FileStreamResult(streamData, "text/plain")
            {
                FileDownloadName = "compressedFile"
            };
            return fileStreamResult;
        }
        catch (Exception  ex)
        {
            throw ex;
        }
    }

    /// <summary>
    /// 构建二叉树
    /// </summary>
    /// <param name="dictionary"></param>
    /// <param name="cancellationToken"></param>
    /// <returns></returns>
    private  HuffmanNode BuildHuffmanTree(Dictionary<byte, int> dictionary, CancellationToken cancellationToken)
    {
        // 按照字符频率排序
        var hash = new PriorityQueue<HuffmanNode, int>(Comparer<int>.Create((x, y) => y - x));

        // 遍历字典,将每个字节及其频率作为节点加入优先队列
        foreach (var item in dictionary)
        {
            hash.Enqueue(new HuffmanNode { Value = item.Key, Frequency = item.Value }, item.Value);
        }

        // 构建树
        do
        {
            // 从优先队列中取出频率最小的两个节点作为左右子节点
            var Left = hash.Dequeue();
            var Right = hash.Dequeue();

            // 创建一个新的节点,其频率为左右子节点的频率之和,并将左右子节点分别设置为新节点的左右子节点
            var data = new HuffmanNode
            {
                Frequency = Left.Frequency + Right.Frequency,
                Left = Left,
                Right = Right
            };

            // 将新节点加入优先队列
            hash.Enqueue(data,data.Frequency);

            // 当优先队列中只剩下一个节点时,结束循环
        } while (hash.Count>1);

        return hash.Dequeue();
    }

    /// <summary>
    /// 生成编码表
    /// </summary>
    /// <param name="node"></param>
    /// <param name="code"></param>
    /// <param name="codeTable"></param>
    public void GenerateCodeTable(HuffmanNode node, string code, Dictionary<byte, string> codeTable)
    {
        // 如果节点的值存在
        if (node.Value.HasValue)
        {
            // 将节点的值作为键,编码作为值存储到编码表中
            codeTable[node.Value.Value] = code;
        }
        else
        {
            // 递归处理左右子树,编码加上"0","1"
            GenerateCodeTable(node.Left, code + "0", codeTable);
            GenerateCodeTable(node.Right, code + "1", codeTable);
        }
    }

    /// <summary>
    /// 用编码表对文件数据进行压缩
    /// </summary>
    /// <param name="data"></param>
    /// <param name="codeTable"></param>
    /// <returns></returns>
    public static byte[] CompressFileAsync(byte[] data, Dictionary<byte, string> codeTable)
    {
        var compressedBytes = new List<byte>();

        foreach (var item in data)
        {
            // 获取当前字节的编码
            var code = codeTable[item];

            // 将编码添加到 compressedBytes 中
            foreach (var c in code)
            {
                if (c == '0')
                {
                    compressedBytes.Add(0);
                }
                else if (c == '1')
                {
                    compressedBytes.Add(1);
                }
            }
        }

        // 返回压缩后的字节数组
        return compressedBytes.ToArray();
    }

}

/// <summary>
/// 哈夫曼树节点类
/// </summary>
public class HuffmanNode
{
    // 节点值
    public byte? Value { get; set; }

    // 节点频率
    public int Frequency { get; set; }

    // 左子节点
    public HuffmanNode Left { get; set; }

    // 右子节点
    public HuffmanNode Right { get; set; }
}

  

posted @ 2024-05-31 17:45  XiangdxDu  阅读(23)  评论(0编辑  收藏  举报