用c#实现哈夫曼压缩算法
/// <summary> /// hash压缩算法 /// </summary> /// <param name="file"></param> /// <param name="cancellationToken"></param> /// <returns></returns> public async Task<IActionResult> HashCompressAsync(IFormFile file, CancellationToken cancellationToken) { try { // 检查文件是否为空 if (file == null || file.Length == 0) { return null; } // 读取传入文件的所有的数据 byte[] fileData = new byte[file.Length]; using (var stream = file.OpenReadStream()) { await stream.ReadAsync(fileData, cancellationToken); } // 统计字符大小(字符频率) Dictionary<byte, int> dictionary = new Dictionary<byte, int>(); foreach (var item in fileData) { // 判断字典中字符是否存在 if (dictionary.ContainsKey(item)) { // 如果有就叠加 dictionary[item]++; } else { dictionary[item] = 1; } } // 构建二叉树 var towTree = BuildHuffmanTree(dictionary,cancellationToken); // 生成编码 var codeTable = new Dictionary<byte,string>(); GenerateCodeTable(towTree, "", codeTable); // 进行压缩 var compressedData = CompressFileAsync(fileData,codeTable); // 将字节数组转换为十六进制字符串,并去除其中的"-"分隔符 string compressedDataString = BitConverter.ToString(compressedData).Replace("-", ""); Console.WriteLine(compressedDataString); var streamData = new MemoryStream(); // 将压缩数据转换为字节数组,使用UTF-8编码 byte[] compressedBytes = Encoding.UTF8.GetBytes(compressedDataString); // 将字节数组写入MemoryStream streamData.Write(compressedBytes, 0, compressedBytes.Length); await streamData.FlushAsync(); // 将流的位置设置为0,以便从头开始读取数据 streamData.Seek(0, SeekOrigin.Begin); // 返回一个文件 var fileStreamResult = new FileStreamResult(streamData, "text/plain") { FileDownloadName = "compressedFile" }; return fileStreamResult; } catch (Exception ex) { throw ex; } } /// <summary> /// 构建二叉树 /// </summary> /// <param name="dictionary"></param> /// <param name="cancellationToken"></param> /// <returns></returns> private HuffmanNode BuildHuffmanTree(Dictionary<byte, int> dictionary, CancellationToken cancellationToken) { // 按照字符频率排序 var hash = new PriorityQueue<HuffmanNode, int>(Comparer<int>.Create((x, y) => y - x)); // 遍历字典,将每个字节及其频率作为节点加入优先队列 foreach (var item in dictionary) { hash.Enqueue(new HuffmanNode { Value = item.Key, Frequency = item.Value }, item.Value); } // 构建树 do { // 从优先队列中取出频率最小的两个节点作为左右子节点 var Left = hash.Dequeue(); var Right = hash.Dequeue(); // 创建一个新的节点,其频率为左右子节点的频率之和,并将左右子节点分别设置为新节点的左右子节点 var data = new HuffmanNode { Frequency = Left.Frequency + Right.Frequency, Left = Left, Right = Right }; // 将新节点加入优先队列 hash.Enqueue(data,data.Frequency); // 当优先队列中只剩下一个节点时,结束循环 } while (hash.Count>1); return hash.Dequeue(); } /// <summary> /// 生成编码表 /// </summary> /// <param name="node"></param> /// <param name="code"></param> /// <param name="codeTable"></param> public void GenerateCodeTable(HuffmanNode node, string code, Dictionary<byte, string> codeTable) { // 如果节点的值存在 if (node.Value.HasValue) { // 将节点的值作为键,编码作为值存储到编码表中 codeTable[node.Value.Value] = code; } else { // 递归处理左右子树,编码加上"0","1" GenerateCodeTable(node.Left, code + "0", codeTable); GenerateCodeTable(node.Right, code + "1", codeTable); } } /// <summary> /// 用编码表对文件数据进行压缩 /// </summary> /// <param name="data"></param> /// <param name="codeTable"></param> /// <returns></returns> public static byte[] CompressFileAsync(byte[] data, Dictionary<byte, string> codeTable) { var compressedBytes = new List<byte>(); foreach (var item in data) { // 获取当前字节的编码 var code = codeTable[item]; // 将编码添加到 compressedBytes 中 foreach (var c in code) { if (c == '0') { compressedBytes.Add(0); } else if (c == '1') { compressedBytes.Add(1); } } } // 返回压缩后的字节数组 return compressedBytes.ToArray(); } } /// <summary> /// 哈夫曼树节点类 /// </summary> public class HuffmanNode { // 节点值 public byte? Value { get; set; } // 节点频率 public int Frequency { get; set; } // 左子节点 public HuffmanNode Left { get; set; } // 右子节点 public HuffmanNode Right { get; set; } }