使用 Open XML SDK 实现 html 富文本转换为 docx 格式示例
使用 Open XML SDK 实现 html 富文本转换为 docx 格式文档相对复杂。下面是一个示例。手动检测 <strong>和 <em> 标签并应用相应的文本格式。
using System; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; class Program { static void Main() { string htmlContent = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"; // 创建一个新的docx文档 using (WordprocessingDocument doc = WordprocessingDocument.Create("output.docx", WordprocessingDocumentType.Document)) { MainDocumentPart mainPart = doc.AddMainDocumentPart(); mainPart.Document = new Document(); Body body = mainPart.Document.AppendChild(new Body()); // 解析HTML并创建docx段落 string[] paragraphs = htmlContent.Split(new[] { "<p>", "</p>" }, StringSplitOptions.RemoveEmptyEntries); foreach (string paragraphContent in paragraphs) { Paragraph paragraph = new Paragraph(); Run run = new Run(); string[] tags = paragraphContent.Split(new[] { "<strong>", "</strong>", "<em>", "</em>" }, StringSplitOptions.None); foreach (string tag in tags) { RunProperties runProperties = new RunProperties(); if (tag.Contains("<strong>")) { runProperties.Bold = new Bold(); } if (tag.Contains("<em>")) { runProperties.Italic = new Italic(); } run.Append(runProperties); run.Append(new Text(tag)); } paragraph.Append(run); body.Append(paragraph); } } Console.WriteLine("HTML to docx conversion complete."); } }
需要根据 HTML 标记的不同来创建相应的 docx元素,例如将<p>标签映射到docx段落,将<strong>标签映射到粗体等。
using System; using System.IO; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; class Program { static void Main() { string htmlContent = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"; // 创建一个新的docx文档 using (WordprocessingDocument doc = WordprocessingDocument.Create("output.docx", WordprocessingDocumentType.Document)) { MainDocumentPart mainPart = doc.AddMainDocumentPart(); mainPart.Document = new Document(); Body body = mainPart.Document.AppendChild(new Body()); // 解析HTML内容并创建相应的docx元素 ProcessHtmlContent(htmlContent, body); doc.Save(); } Console.WriteLine("HTML to docx conversion complete."); } static void ProcessHtmlContent(string htmlContent, OpenXmlElement parentElement) { // 解析HTML内容并将其映射到docx元素 // 这里需要根据HTML标记的不同来创建相应的docx元素 // 例如,<p>标签可以映射到段落,<strong>可以映射到粗体文本等 // 示例:将HTML段落转换为docx段落 if (htmlContent.StartsWith("<p>") && htmlContent.EndsWith("</p>")) { string paragraphText = htmlContent.Substring(3, htmlContent.Length - 7); Paragraph paragraph = new Paragraph(new Run(new Text(paragraphText))); parentElement.Append(paragraph); } // 添加更多的HTML标记处理逻辑以满足你的需求 } }
下例使用 Html2OpenXml 将 html 转 docx
using System; using System.IO; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using HtmlToOpenXml; class Program { static void Main() { string htmlContent = "<p>This is <strong>bold</strong> and <em>italic</em> text.</p>"; // 创建一个新的docx文档 using (WordprocessingDocument doc = WordprocessingDocument.Create("output.docx", WordprocessingDocumentType.Document)) { MainDocumentPart mainPart = doc.AddMainDocumentPart(); mainPart.Document = new Document(); Body body = mainPart.Document.AppendChild(new Body()); // 使用Html2OpenXml将HTML内容转换为docx元素 var converter = new HtmlConverter(mainPart); converter.ImageProcessing = ImageProcessing.AutomaticDownload; var paragraphs = converter.Parse(htmlContent); foreach (var paragraph in paragraphs) { body.Append(paragraph); } doc.Save(); } Console.WriteLine("HTML to docx conversion complete."); } }
带内联式 css 样式。
using System; using System.IO; using System.Text; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using HtmlToOpenXml; class Program { static void Main() { string htmlContent = "<p style=\"color: blue; font-size: 14px;\">This is <strong>bold</strong> and <em>italic</em> text.</p>"; // 创建一个新的docx文档 using (WordprocessingDocument doc = WordprocessingDocument.Create("output.docx", WordprocessingDocumentType.Document)) { MainDocumentPart mainPart = doc.AddMainDocumentPart(); mainPart.Document = new Document(); Body body = mainPart.Document.AppendChild(new Body()); // 使用Html2OpenXml将HTML内容转换为docx元素 var converter = new HtmlConverter(mainPart); converter.ImageProcessing = ImageProcessing.AutomaticDownload; var paragraphs = converter.Parse(htmlContent); foreach (var paragraph in paragraphs) { body.Append(paragraph); } doc.Save(); } Console.WriteLine("HTML to docx conversion complete."); } }
带 css 样式应用
using System; using System.IO; using System.Text; using DocumentFormat.OpenXml; using DocumentFormat.OpenXml.Packaging; using DocumentFormat.OpenXml.Wordprocessing; using HtmlToOpenXml; class Program { static void Main() { string htmlContent = "<p class=\"my-paragraph\">This is <strong>bold</strong> and <em>italic</em> text.</p>"; string externalCss = ".my-paragraph { color: blue; font-size: 14px; }"; // 创建一个新的docx文档 using (WordprocessingDocument doc = WordprocessingDocument.Create("output.docx", WordprocessingDocumentType.Document)) { MainDocumentPart mainPart = doc.AddMainDocumentPart(); mainPart.Document = new Document(); Body body = mainPart.Document.AppendChild(new Body()); // 将外部CSS样式转化为内联样式 htmlContent = ApplyExternalCssToHtml(htmlContent, externalCss); // 使用Html2OpenXml将HTML内容转换为docx元素 var converter = new HtmlConverter(mainPart); converter.ImageProcessing = ImageProcessing.AutomaticDownload; var paragraphs = converter.Parse(htmlContent); foreach (var paragraph in paragraphs) { body.Append(paragraph); } doc.Save(); } Console.WriteLine("HTML to docx conversion complete."); } static string ApplyExternalCssToHtml(string htmlContent, string externalCss) { // 解析外部CSS文件并将其应用于HTML内容 // 这里需要将CSS规则应用到HTML标记的内联样式中 // 在此示例中,我们简单地将CSS类名替换为内联样式 // 你可能需要更复杂的CSS处理逻辑,取决于外部CSS文件的内容和结构 // 将样式规则拆分为每个类 var cssRules = externalCss.Split('}'); foreach (var rule in cssRules) { if (!string.IsNullOrWhiteSpace(rule)) { // 提取类名和样式属性 var parts = rule.Split('{'); var className = parts[0].Trim(); var style = parts[1].Trim(); // 将类名替换为内联样式 htmlContent = htmlContent.Replace($"class=\"{className}\"", $"style=\"{style}\""); } } return htmlContent; } }