记录下DynamicXml和HtmlDocument 使用方式

之前解析都是XmlDocument.Load 而现在可以利用DynamicXml生成Dynamic对象实现强类型操作,很好用.

        /// <summary>
        /// 根据Xml路径动态解析成XML-Object
        /// </summary>
        /// <param name="filename"></param>
        /// <returns></returns>
        public static DynamicXml ReturnDynamicXml(this string fileName) {
            if (string.IsNullOrWhiteSpace(fileName))
                return null;
            try {
                DynamicXml xmlObject = DynamicXml.Load(fileName);
                Console.WriteLine(fileName + "-XML解析数据成功");
                return xmlObject;
            } catch {
                Console.WriteLine(fileName + "-XML解析数据失败,任务中断");
                return null;
            }
        }
public class DynamicXml : DynamicObject {

        #region Fields
        public XElement _root;
        #endregion

        #region .Ctor
        private DynamicXml(XElement root) {
            _root = root;

        }
        #endregion

        #region Methods
        /// <summary>
        /// 对字符串解析
        /// </summary>
        /// <param name="xmlString"></param>
        /// <returns></returns>
        public static DynamicXml Parse(string xmlString) {
            var xml = XDocument.Load(xmlString);
            xml.Root.Descendants().Attributes().Where(x => x.IsNamespaceDeclaration).Remove();
            foreach (var elem in xml.Descendants())
                elem.Name = elem.Name.LocalName;
            return new DynamicXml(xml.Root);
        }

        /// <summary>
        /// 对路径解析
        /// </summary>
        /// <param name="filename"></param>
        /// <returns></returns>
        public static DynamicXml Load(string filename) {
            var xml = XDocument.Load(filename);
            xml.Root.Descendants().Attributes().Where(x => x.IsNamespaceDeclaration).Remove();

            foreach (var elem in xml.Descendants()) {
                elem.Name = elem.Name.LocalName;
                RemoveAllNamespaces(elem);
            }

            return new DynamicXml(xml.Root);
        }
        

        /// <summary>
        /// 移除命名空间
        /// </summary>
        /// <param name="xmlDocument"></param>
        /// <returns></returns>
        private static XElement RemoveAllNamespaces(XElement xmlDocument) {
            if (!xmlDocument.HasElements) {
                XElement xElement = new XElement(xmlDocument.Name.LocalName);
                xElement.Value = xmlDocument.Value;
                foreach (XAttribute attribute in xmlDocument.Attributes())
                    xElement.Add(attribute);

                return xElement;
            }
            return new XElement(xmlDocument.Name.LocalName, xmlDocument.Elements().Select(el => RemoveAllNamespaces(el)));
        }

        /// <summary>
        /// 重写获取成员
        /// </summary>
        /// <param name="binder"></param>
        /// <param name="result"></param>
        /// <returns></returns>
        public override bool TryGetMember(GetMemberBinder binder, out object result) {
            result = null;

            var att = _root.Attribute(binder.Name);
            if (att != null) {
                result = att.Value;
                return true;
            }

            var nodes = _root.Elements(binder.Name);
            if (nodes.Count() > 1) {
                result = nodes.Select(n => new DynamicXml(n)).ToList();
                return true;
            }

            var node = _root.Element(binder.Name);
            if (node != null) {
                if (node.HasElements) {
                    result = new DynamicXml(node);
                } else {
                    result = node.Value;
                }
                return true;
            }

            return true;
        }
        #endregion
    }

另外一是对HTML代码进行选择如下:

    public class QueryHtmlNode
    {
        public void Go()
        {
            WebClient client = new WebClient();
           
            MemoryStream ms = new MemoryStream(client.DownloadData("http://www.44woool.com/"));
            HtmlDocument doc = new HtmlDocument();
            doc.Load(ms, Encoding.GetEncoding("gb2312"));
            var trs = doc.DocumentNode.SelectNodes("//tr").Where(it => null != it.Attributes["bgColor"] && it.Attributes["bgColor"].Value.Equals("#FFFF99")).ToList();
            foreach (var tr in trs)
            {
                Console.WriteLine(new string('#', 50));
                #region --- 特定一些数据 ---
                //if (null != tr.SelectSingleNode("td/font"))
                //{
                //    //时间
                //    Console.WriteLine(tr.SelectSingleNode("td/font").InnerText);
                //}
                //if (null != tr.SelectSingleNode("td[2]/a[@href]"))
                //{
                //    //链接
                //    Console.WriteLine(tr.SelectSingleNode("td[2]/a[@href]").Attributes["href"].Value);
                //} 
                #endregion
                #region --- 所有 ---
                //foreach (var td in tr.SelectNodes("td"))
                //{
                //    Console.WriteLine(td.InnerText);

                //    foreach (var a in tr.SelectNodes("td/a[@href]"))
                //    {
                //        Console.WriteLine(a.InnerText + "---------" + a.Attributes["href"].Value);
                //    }
                //}
                #endregion
                #region --- 按列取数据 ---
                var tds = tr.SelectNodes("td");
                for (int i = 0; i < tds.Count; i++)
                {
                    switch (i)
                    {
                        case 0:
                            Console.WriteLine("服务器名称:" + tds[i].InnerText);
                            if (null != tds[i].SelectSingleNode("a[@href]"))
                            {
                                Console.WriteLine("URL:" + tds[i].SelectSingleNode("a[@href]").Attributes["href"].Value);
                            }
                            break;
                        case 2:
                            Console.WriteLine("开放时间:" + tds[i].InnerText);
                            break;
                        case 4:
                            Console.WriteLine("版本介绍:" + tds[i].InnerText);
                            break;
                        case 5:
                            Console.WriteLine("QQ:" + tds[i].InnerText);
                            break;
                        default:
                            break;
                    }
                }

                #endregion
            }
            Console.WriteLine(trs.Count());
        }
    }

 

posted on 2014-05-08 09:26  ~紫鱼~  阅读(676)  评论(0编辑  收藏  举报