记录下DynamicXml和HtmlDocument 使用方式
之前解析都是XmlDocument.Load 而现在可以利用DynamicXml生成Dynamic对象实现强类型操作,很好用.
/// <summary> /// 根据Xml路径动态解析成XML-Object /// </summary> /// <param name="filename"></param> /// <returns></returns> public static DynamicXml ReturnDynamicXml(this string fileName) { if (string.IsNullOrWhiteSpace(fileName)) return null; try { DynamicXml xmlObject = DynamicXml.Load(fileName); Console.WriteLine(fileName + "-XML解析数据成功"); return xmlObject; } catch { Console.WriteLine(fileName + "-XML解析数据失败,任务中断"); return null; } }
public class DynamicXml : DynamicObject { #region Fields public XElement _root; #endregion #region .Ctor private DynamicXml(XElement root) { _root = root; } #endregion #region Methods /// <summary> /// 对字符串解析 /// </summary> /// <param name="xmlString"></param> /// <returns></returns> public static DynamicXml Parse(string xmlString) { var xml = XDocument.Load(xmlString); xml.Root.Descendants().Attributes().Where(x => x.IsNamespaceDeclaration).Remove(); foreach (var elem in xml.Descendants()) elem.Name = elem.Name.LocalName; return new DynamicXml(xml.Root); } /// <summary> /// 对路径解析 /// </summary> /// <param name="filename"></param> /// <returns></returns> public static DynamicXml Load(string filename) { var xml = XDocument.Load(filename); xml.Root.Descendants().Attributes().Where(x => x.IsNamespaceDeclaration).Remove(); foreach (var elem in xml.Descendants()) { elem.Name = elem.Name.LocalName; RemoveAllNamespaces(elem); } return new DynamicXml(xml.Root); } /// <summary> /// 移除命名空间 /// </summary> /// <param name="xmlDocument"></param> /// <returns></returns> private static XElement RemoveAllNamespaces(XElement xmlDocument) { if (!xmlDocument.HasElements) { XElement xElement = new XElement(xmlDocument.Name.LocalName); xElement.Value = xmlDocument.Value; foreach (XAttribute attribute in xmlDocument.Attributes()) xElement.Add(attribute); return xElement; } return new XElement(xmlDocument.Name.LocalName, xmlDocument.Elements().Select(el => RemoveAllNamespaces(el))); } /// <summary> /// 重写获取成员 /// </summary> /// <param name="binder"></param> /// <param name="result"></param> /// <returns></returns> public override bool TryGetMember(GetMemberBinder binder, out object result) { result = null; var att = _root.Attribute(binder.Name); if (att != null) { result = att.Value; return true; } var nodes = _root.Elements(binder.Name); if (nodes.Count() > 1) { result = nodes.Select(n => new DynamicXml(n)).ToList(); return true; } var node = _root.Element(binder.Name); if (node != null) { if (node.HasElements) { result = new DynamicXml(node); } else { result = node.Value; } return true; } return true; } #endregion }
另外一是对HTML代码进行选择如下:
public class QueryHtmlNode { public void Go() { WebClient client = new WebClient(); MemoryStream ms = new MemoryStream(client.DownloadData("http://www.44woool.com/")); HtmlDocument doc = new HtmlDocument(); doc.Load(ms, Encoding.GetEncoding("gb2312")); var trs = doc.DocumentNode.SelectNodes("//tr").Where(it => null != it.Attributes["bgColor"] && it.Attributes["bgColor"].Value.Equals("#FFFF99")).ToList(); foreach (var tr in trs) { Console.WriteLine(new string('#', 50)); #region --- 特定一些数据 --- //if (null != tr.SelectSingleNode("td/font")) //{ // //时间 // Console.WriteLine(tr.SelectSingleNode("td/font").InnerText); //} //if (null != tr.SelectSingleNode("td[2]/a[@href]")) //{ // //链接 // Console.WriteLine(tr.SelectSingleNode("td[2]/a[@href]").Attributes["href"].Value); //} #endregion #region --- 所有 --- //foreach (var td in tr.SelectNodes("td")) //{ // Console.WriteLine(td.InnerText); // foreach (var a in tr.SelectNodes("td/a[@href]")) // { // Console.WriteLine(a.InnerText + "---------" + a.Attributes["href"].Value); // } //} #endregion #region --- 按列取数据 --- var tds = tr.SelectNodes("td"); for (int i = 0; i < tds.Count; i++) { switch (i) { case 0: Console.WriteLine("服务器名称:" + tds[i].InnerText); if (null != tds[i].SelectSingleNode("a[@href]")) { Console.WriteLine("URL:" + tds[i].SelectSingleNode("a[@href]").Attributes["href"].Value); } break; case 2: Console.WriteLine("开放时间:" + tds[i].InnerText); break; case 4: Console.WriteLine("版本介绍:" + tds[i].InnerText); break; case 5: Console.WriteLine("QQ:" + tds[i].InnerText); break; default: break; } } #endregion } Console.WriteLine(trs.Count()); } }