从ClickHouse中流式查询大数据
提示:本篇不适合手机阅读,主要为了提供代码实现。
为了减速少大的Excel文件在内存中驻留,使用流的方式,边查询边组装,边下载文件相对来说是一个好的方式,下面是基于ClickHouse数据源的方式,下载100万条记录的处理方式,本地测试,内存只有100多M的使用,下载完后就会释放。
注:下面的代码仅是演示代码,本文中的组装excel参考https://github.com/mini-software/MiniExcel
using Microsoft.Extensions.Configuration;
using System.Globalization;
using System.IO;
using System.IO.Compression;
using System.Text.RegularExpressions;
using System.Text;
using System.Xml;
using Microsoft.AspNetCore.Razor.TagHelpers;
using static System.Net.WebRequestMethods;
var builder = WebApplication.CreateBuilder(args);
builder.Services.AddHttpClient();
var app = builder.Build();
app.MapGet("/getfile", async (IHttpClientFactory clientFactory, CancellationToken token) =>
{
//用http的方式查询clickhouse
var client = clientFactory.CreateClient();
var sql = "select field1,field2,field3,field4,field5,field6,field7,field8,field9,field10 from tablename order by field1 limit 1000000";
var stream = await client.GetStreamAsync($"http://127.0.0.1:8123/?user=dev_owner&password=mypassword&query={sql} FORMAT JSONCompactEachRowWithNamesAndTypes", token);
var utf8encoding = new UTF8Encoding(true);
var bufferSize = 5 * 1024 * 1024;
var memoryStream = new MemoryStream();
var archive = new ExcelZipArchive(memoryStream, ZipArchiveMode.Create, true, utf8encoding);
var zipDictionary = new Dictionary<string, ZipPackageInfo>();
var id = $"R{Guid.NewGuid():N}";
var sheetName = "Sheet1";
var sheetPath = $"xl/worksheets/sheet1.xml";
var sheetIdx = 1;
#region 组装openxml的zip
{
var _defaultRels = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
<Relationship Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"" Target=""xl/workbook.xml"" Id=""Rfc2254092b6248a9"" />
</Relationships>");
CreateZipEntry("_rels/.rels", "application/vnd.openxmlformats-package.relationships+xml", _defaultRels);
}
{
var _defaultSharedString = ReplaceString("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?><sst xmlns=\"http://schemas.openxmlformats.org/spreadsheetml/2006/main\" count=\"0\" uniqueCount=\"0\"></sst>");
CreateZipEntry("xl/sharedStrings.xml", "application/vnd.openxmlformats-package.relationships+xml", _defaultSharedString);
}
{
var entry = archive.CreateEntry(sheetPath, CompressionLevel.Fastest);
using var zipStream = entry.Open();
using var writer = new StreamWriter(zipStream, utf8encoding, bufferSize);
writer.Write($@"<?xml version=""1.0"" encoding=""utf-8""?><x:worksheet xmlns:x=""http://schemas.openxmlformats.org/spreadsheetml/2006/main"">");
var yIndex = 1;
var xIndex = 1;
{
var sReader = new StreamReader(stream);
if (!sReader.EndOfStream)
{
var nameLine = await sReader.ReadLineAsync();
var typeLine = await sReader.ReadLineAsync();
var fieldNameArr = System.Text.Json.JsonSerializer.Deserialize<object[]>(nameLine);
int fieldCount = fieldNameArr.Length;
//处理列宽度
writer.Write($@"<x:cols>");
for (int i = 0; i < fieldCount; i++)
{
writer.Write($@"<x:col min=""{i + 1}"" max=""{i + 1}"" {$@"width=""30"""} customWidth=""1"" />");
}
writer.Write($@"</x:cols>");
//处理表格
writer.Write("<x:sheetData>");
//处理表头
writer.Write($"<x:row r=\"{yIndex}\">");
xIndex = 1;
for (int i = 0; i < fieldCount; i++)
{
var columnName = fieldNameArr[i].ToString();
WriteC(writer, "1", columnName);
xIndex++;
}
writer.Write($"</x:row>");
yIndex++;
}
//处理表格数据
while (!sReader.EndOfStream)
{
writer.Write($"<x:row r=\"{yIndex}\">");
var dataLine = await sReader.ReadLineAsync();
var dataArr = System.Text.Json.JsonSerializer.Deserialize<object[]>(dataLine);
xIndex = 1;
for (var i = 0; i < dataArr.Length; i++)
{
WriteCell(writer, yIndex, xIndex, dataArr[i]);
xIndex++;
}
writer.Write($"</x:row>");
yIndex++;
}
}
writer.Write("</x:sheetData>");
writer.Write("</x:worksheet>");
writer.Flush();
}
{
string _defaultStylesXml = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?>
<x:styleSheet xmlns:x=""http://schemas.openxmlformats.org/spreadsheetml/2006/main"">
<x:numFmts count=""1"">
<x:numFmt numFmtId=""0"" formatCode="""" />
</x:numFmts>
<x:fonts count=""2"">
<x:font>
<x:vertAlign val=""baseline"" />
<x:sz val=""11"" />
<x:color rgb=""FF000000"" />
<x:name val=""Calibri"" />
<x:family val=""2"" />
</x:font>
<x:font>
<x:vertAlign val=""baseline"" />
<x:sz val=""11"" />
<x:color rgb=""00000000"" />
<x:name val=""Calibri"" />
<x:family val=""2"" />
</x:font>
</x:fonts>
<x:fills count=""3"">
<x:fill>
<x:patternFill patternType=""none"" />
</x:fill>
<x:fill>
<x:patternFill patternType=""gray125"" />
</x:fill>
<x:fill>
<x:patternFill patternType=""solid"">
<x:fgColor rgb=""FFFFFFFF"" />
</x:patternFill>
</x:fill>
</x:fills>
<x:borders count=""2"">
<x:border diagonalUp=""0"" diagonalDown=""0"">
<x:left style=""none"">
<x:color rgb=""FF000000"" />
</x:left>
<x:right style=""none"">
<x:color rgb=""FF000000"" />
</x:right>
<x:top style=""none"">
<x:color rgb=""FF000000"" />
</x:top>
<x:bottom style=""none"">
<x:color rgb=""FF000000"" />
</x:bottom>
<x:diagonal style=""none"">
<x:color rgb=""FF000000"" />
</x:diagonal>
</x:border>
<x:border diagonalUp=""0"" diagonalDown=""0"">
<x:left style=""thin"">
<x:color rgb=""FF000000"" />
</x:left>
<x:right style=""thin"">
<x:color rgb=""FF000000"" />
</x:right>
<x:top style=""thin"">
<x:color rgb=""FF000000"" />
</x:top>
<x:bottom style=""thin"">
<x:color rgb=""FF000000"" />
</x:bottom>
<x:diagonal style=""none"">
<x:color rgb=""FF000000"" />
</x:diagonal>
</x:border>
</x:borders>
<x:cellStyleXfs count=""4"">
<x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""0"" applyAlignment=""1"" applyProtection=""1"">
<x:protection locked=""1"" hidden=""0"" />
</x:xf>
<x:xf numFmtId=""14"" fontId=""1"" fillId=""2"" borderId=""1"" applyNumberFormat=""1"" applyFill=""0"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1"">
<x:protection locked=""1"" hidden=""0"" />
</x:xf>
<x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""1"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1"">
<x:protection locked=""1"" hidden=""0"" />
</x:xf>
<x:xf numFmtId=""3"" fontId=""0"" fillId=""0"" borderId=""1"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1"">
<x:protection locked=""1"" hidden=""0""/>
</x:xf>
</x:cellStyleXfs>
<x:cellXfs count=""5"">
<x:xf></x:xf>
<x:xf numFmtId=""0"" fontId=""1"" fillId=""2"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""0"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1"">
<x:alignment horizontal=""left"" vertical=""bottom"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0"" />
<x:protection locked=""1"" hidden=""0"" />
</x:xf>
<x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1"">
<x:alignment horizontal=""general"" vertical=""bottom"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0"" />
<x:protection locked=""1"" hidden=""0"" />
</x:xf>
<x:xf numFmtId=""14"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1"">
<x:alignment horizontal=""general"" vertical=""bottom"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0"" />
<x:protection locked=""1"" hidden=""0"" />
</x:xf>
<x:xf numFmtId=""0"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyBorder=""1"" applyAlignment=""1"">
<x:alignment horizontal=""fill""/>
</x:xf>
<x:xf numFmtId=""3"" fontId=""0"" fillId=""0"" borderId=""1"" xfId=""0"" applyNumberFormat=""1"" applyFill=""1"" applyBorder=""1"" applyAlignment=""1"" applyProtection=""1"">
<x:alignment horizontal=""center"" vertical=""center"" textRotation=""0"" wrapText=""0"" indent=""0"" relativeIndent=""0"" justifyLastLine=""0"" shrinkToFit=""0"" readingOrder=""0""/>
<x:protection locked=""1"" hidden=""0""/>
</x:xf>
</x:cellXfs>
<x:cellStyles count=""1"">
<x:cellStyle name=""Normal"" xfId=""0"" builtinId=""0"" />
</x:cellStyles>
</x:styleSheet>");
var styleXml = _defaultStylesXml;
CreateZipEntry(@"xl/styles.xml", "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml", styleXml);
}
{
string _defaultDrawingXmlRels = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8"" standalone=""yes""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
{{format}}
</Relationships>");
var drawing = new StringBuilder();
CreateZipEntry($"xl/drawings/_rels/drawing1.xml.rels", "",
_defaultDrawingXmlRels.Replace("{{format}}", drawing.ToString()));
}
{
var drawing = new StringBuilder();
string _defaultDrawing = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8"" standalone=""yes""?>
<xdr:wsDr xmlns:a=""http://schemas.openxmlformats.org/drawingml/2006/main""
xmlns:r=""http://schemas.openxmlformats.org/officeDocument/2006/relationships""
xmlns:xdr=""http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing"">
{{format}}
</xdr:wsDr>");
CreateZipEntry($"xl/drawings/drawing1.xml", "application/vnd.openxmlformats-officedocument.drawing+xml",
_defaultDrawing.Replace("{{format}}", drawing.ToString()));
}
{
var workbookXml = new StringBuilder();
var workbookRelsXml = new StringBuilder();
var sheetId = 1;
workbookXml.AppendLine($@"<x:sheet name=""{sheetName}"" sheetId=""{sheetId}"" r:id=""{id}"" />");
workbookRelsXml.AppendLine($@"<Relationship Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"" Target=""/{sheetPath}"" Id=""{id}"" />");
var sheetRelsXml = ReplaceString($@"<Relationship Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/drawing"" Target=""../drawings/drawing{sheetId}.xml"" Id=""drawing{sheetId}"" />");
string _defaultSheetRelXml = ReplaceString(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
{{format}}
</Relationships>");
CreateZipEntry($"xl/worksheets/_rels/sheet{sheetIdx}.xml.rels", "",
_defaultSheetRelXml.Replace("{{format}}", sheetRelsXml));
string _defaultWorkbookXml = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?>
<x:workbook xmlns:r=""http://schemas.openxmlformats.org/officeDocument/2006/relationships""
xmlns:x=""http://schemas.openxmlformats.org/spreadsheetml/2006/main"">
<x:sheets>
{{sheets}}
</x:sheets>
</x:workbook>");
CreateZipEntry(@"xl/workbook.xml", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml",
_defaultWorkbookXml.Replace("{{sheets}}", workbookXml.ToString()));
string _defaultWorkbookXmlRels = ReplaceString(@"<?xml version=""1.0"" encoding=""utf-8""?>
<Relationships xmlns=""http://schemas.openxmlformats.org/package/2006/relationships"">
{{sheets}}
<Relationship Type=""http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"" Target=""/xl/styles.xml"" Id=""R3db9602ace774fdb"" />
</Relationships>");
CreateZipEntry(@"xl/_rels/workbook.xml.rels", "",
_defaultWorkbookXmlRels.Replace("{{sheets}}", workbookRelsXml.ToString()));
var sb = new StringBuilder(@"<?xml version=""1.0"" encoding=""UTF-8"" standalone=""yes""?><Types xmlns=""http://schemas.openxmlformats.org/package/2006/content-types""><Default ContentType=""application/vnd.openxmlformats-officedocument.spreadsheetml.printerSettings"" Extension=""bin""/><Default ContentType=""application/xml"" Extension=""xml""/><Default ContentType=""image/jpeg"" Extension=""jpg""/><Default ContentType=""image/png"" Extension=""png""/><Default ContentType=""image/gif"" Extension=""gif""/><Default ContentType=""application/vnd.openxmlformats-package.relationships+xml"" Extension=""rels""/>");
foreach (var p in zipDictionary)
{
sb.Append($"<Override ContentType=\"{p.Value.ContentType}\" PartName=\"/{p.Key}\" />");
}
sb.Append("</Types>");
var entry = archive.CreateEntry("[Content_Types].xml", CompressionLevel.Fastest);
using var zipStream = entry.Open();
using var writer = new StreamWriter(zipStream, utf8encoding, bufferSize);
writer.Write(sb.ToString());
}
#endregion
archive.Dispose();
void CreateZipEntry(string path, string contentType, string content)
{
var entry = archive.CreateEntry(path, CompressionLevel.Fastest);
using var zipStream = entry.Open();
using var writer = new StreamWriter(zipStream, utf8encoding, bufferSize);
writer.Write(content);
if (!string.IsNullOrEmpty(contentType))
zipDictionary.Add(path, new ZipPackageInfo(entry, contentType));
}
memoryStream.Seek(0, SeekOrigin.Begin);
await memoryStream.FlushAsync(token);
return TypedResults.File(fileStream: memoryStream, contentType: "application/octet-stream", fileDownloadName: $"{DateTime.Now.ToString("yyyyMMddhhssmm")}.xlsx");
});
app.Run();
string ReplaceString(string xml) => xml.Replace("\r", "").Replace("\n", "").Replace("\t", "");
string ConvertXyToCell(int x, int y)
{
int dividend = x;
string columnName = String.Empty;
int modulo;
while (dividend > 0)
{
modulo = (dividend - 1) % 26;
columnName = Convert.ToChar(65 + modulo).ToString() + columnName;
dividend = (int)((dividend - modulo) / 26);
}
return $"{columnName}{y}";
}
void WriteC(StreamWriter writer, string r, string columnName)
{
writer.Write($"<x:c r=\"{r}\" t=\"str\" s=\"1\">");
writer.Write($"<x:v>{EncodeXML(columnName)}"); //issue I45TF5
writer.Write($"</x:v>");
writer.Write($"</x:c>");
}
void WriteCell(StreamWriter writer, int rowIndex, int cellIndex, object value)
{
var v = string.Empty;
var t = "str";
var s = "2";
if (value == null)
{
v = "";
}
else if (value is string str)
{
v = EncodeXML(str);
}
else
{
var type = value.GetType();
type = Nullable.GetUnderlyingType(type) ?? type;
if (IsNumericType(type))
{
t = "n";
//if (isMoney)
//{
// s = "5";
//}
if (type.IsAssignableFrom(typeof(decimal)))
v = ((decimal)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(Int32)))
v = ((Int32)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(Double)))
v = ((Double)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(Int64)))
v = ((Int64)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(UInt32)))
v = ((UInt32)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(UInt16)))
v = ((UInt16)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(UInt64)))
v = ((UInt64)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(Int16)))
v = ((Int16)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(Single)))
v = ((Single)value).ToString(CultureInfo.InvariantCulture);
else if (type.IsAssignableFrom(typeof(Single)))
v = ((Single)value).ToString(CultureInfo.InvariantCulture);
else
v = (decimal.Parse(value.ToString())).ToString(CultureInfo.InvariantCulture);
}
else if (type == typeof(bool))
{
t = "b";
v = (bool)value ? "1" : "0";
}
else if (type == typeof(DateTime))
{
t = null;
s = "3";
v = ((DateTime)value).ToOADate().ToString(CultureInfo.InvariantCulture);
}
else
{
v = EncodeXML(value.ToString());
}
}
var columname = ConvertXyToCell(cellIndex, rowIndex);
if (v != null && (v.StartsWith(" ", StringComparison.Ordinal) || v.EndsWith(" ", StringComparison.Ordinal)))
writer.Write($"<x:c r=\"{columname}\" {(t == null ? "" : $"t =\"{t}\"")} s=\"{s}\" xml:space=\"preserve\"><x:v>{v}</x:v></x:c>");
else
writer.Write($"<x:c r=\"{columname}\" {(t == null ? "" : $"t =\"{t}\"")} s=\"{s}\"><x:v>{v}</x:v></x:c>");
}
bool IsNumericType(Type type, bool isNullableUnderlyingType = false)
{
if (isNullableUnderlyingType)
type = Nullable.GetUnderlyingType(type) ?? type;
switch (Type.GetTypeCode(type))
{
//case TypeCode.Byte:
//case TypeCode.SByte:
case TypeCode.UInt16:
case TypeCode.UInt32:
case TypeCode.UInt64:
case TypeCode.Int16:
case TypeCode.Int32:
case TypeCode.Int64:
case TypeCode.Decimal:
case TypeCode.Double:
case TypeCode.Single:
return true;
default:
return false;
}
}
string EncodeXML(string value) => value == null
? string.Empty
: XmlEncoder.EncodeString(value)
.Replace("&", "&")
.Replace("<", "<")
.Replace(">", ">")
.Replace("\"", """)
.Replace("'", "'")
.ToString();
public class ExcelZipArchive : ZipArchive
{
public ExcelZipArchive(Stream stream, ZipArchiveMode mode, bool leaveOpen, Encoding entryNameEncoding)
: base(stream, mode, leaveOpen, entryNameEncoding)
{
}
public new void Dispose()
{
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
}
class XmlEncoder
{
private static readonly Regex xHHHHRegex = new Regex("_(x[\\dA-Fa-f]{4})_", RegexOptions.Compiled);
private static readonly Regex Uppercase_X_HHHHRegex = new Regex("_(X[\\dA-Fa-f]{4})_", RegexOptions.Compiled);
public static StringBuilder EncodeString(string encodeStr)
{
if (encodeStr == null) return null;
encodeStr = xHHHHRegex.Replace(encodeStr, "_x005F_$1_");
var sb = new StringBuilder(encodeStr.Length);
foreach (var ch in encodeStr)
{
if (XmlConvert.IsXmlChar(ch))
sb.Append(ch);
else
sb.Append(XmlConvert.EncodeName(ch.ToString()));
}
return sb;
}
public static string DecodeString(string decodeStr)
{
if (string.IsNullOrEmpty(decodeStr))
return string.Empty;
decodeStr = Uppercase_X_HHHHRegex.Replace(decodeStr, "_x005F_$1_");
return XmlConvert.DecodeName(decodeStr);
}
private static readonly Regex EscapeRegex = new Regex("_x([0-9A-F]{4,4})_");
public static string ConvertEscapeChars(string input)
{
return EscapeRegex.Replace(input, m => ((char)uint.Parse(m.Groups[1].Value, NumberStyles.HexNumber)).ToString());
}
}
internal class ZipPackageInfo
{
public ZipArchiveEntry ZipArchiveEntry { get; set; }
public string ContentType { get; set; }
public ZipPackageInfo(ZipArchiveEntry zipArchiveEntry, string contentType)
{
this.ZipArchiveEntry = zipArchiveEntry;
ContentType = contentType;
}
}
想要更快更方便的了解相关知识,可以关注微信公众号
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 一文读懂知识蒸馏
· 终于写完轮子一部分:tcp代理 了,记录一下
2023-02-15 『 再看.NET7』是否数值类型
2023-02-15 『 再看.NET7』泛性特性使用场景
2017-02-15 数据类型(字面值)
2017-02-15 数据类型表格
2017-02-15 数据类型