SgmlReader html to xml
SgmlReader
SgmlReader是一个开源的解析器,运用SgmlReader类来解析HTML文件,并生成格式规范(well-formed)的HTML。
主站地址为:http://archive.msdn.microsoft.com/SgmlReader
下面代码使用SgmlReader格式化HTML:
public static string Convert(string html) { if (string.IsNullOrEmpty(html.Trim())) { return string.Empty; } using (SgmlReader reader = new SgmlReader()) { reader.DocType = "HTML"; reader.InputStream = new StringReader(html); using (StringWriter stringWriter = new StringWriter()) { using (XmlTextWriter writer = new XmlTextWriter(stringWriter)) { reader.WhitespaceHandling = WhitespaceHandling.None; writer.Formatting = Formatting.Indented; XmlDocument doc = new XmlDocument(); doc.Load(reader); if (doc.DocumentElement == null) { return string.Empty; } else { doc.DocumentElement.WriteContentTo(writer); } writer.Close(); string xhtml = stringWriter.ToString(); return xhtml; } } } }