XPathClass类,使用xpath返回属性值或文本

1,需要添加文件HtmlAgilityPack.dll以及对它的引用

2,类码

public class XPathClass
    {
       
        /// <summary>
        /// 返回节点内的文本值,如<span>文字</span>,返回"文字"
        /// </summary>
        /// <param name="htmlSource">html页面源代码</param>
        /// <param name="xpath">xpath路径</param>
        /// <returns>一个节点对应文本值</returns>
        public static string GetInnerText(string htmlSource,string xpath)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath);
            if (node != null)
            {
                return node.InnerText;
            }
            else
            {
                return null;
            }
        }

        /// <summary>
        /// 获取节点的属性值,如<a href="#"></a>返回herf的值为#
        /// </summary>
        /// <param name="htmlSource">html源代码</param>
        /// <param name="xpath">节点的xpath路径</param>
        /// <param name="attrName">属性名</param>
        /// <returns>属性名对应的值</returns>
        public static string GetAttribute(string htmlSource,string xpath,string attrName)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath);
            if (node != null)
            {
                return node.Attributes[attrName].Value;
            }
            else
            {
                return null;
            } 
        }

        /// <summary>
        /// 返回满足相同xpath的多个节点内的文本值,如<span>文字</span>,返回"文字"
        /// </summary>
        /// <param name="htmlSource">html页面源代码</param>
        /// <param name="xpath">xpath路径</param>
        /// <returns>一个节点对应文本值</returns>
        public static List<string> GetInnerTexts(string htmlSource, string xpath)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath);
            if (nodes != null)
            {
                List<string> list = new List<string>();
                for (int i = 0; i < nodes.Count; i++)
                {
                    list.Add(nodes[i].InnerText);
                }
                return list;
            }
            else
            {
                return null;
            }           
        }

        /// <summary>
        /// 获取多个节点的属性值,如<a href="#"></a>返回herf的值为#
        /// </summary>
        /// <param name="htmlSource">html源代码</param>
        /// <param name="xpath">节点的xpath路径</param>
        /// <param name="attrName">属性名</param>
        /// <returns>属性名对应的值</returns>
        public static List<string> GetAttributes(string htmlSource, string xpath, string attrName)
        {
            HtmlAgilityPack.HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(htmlSource);
            HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath);
            if (nodes != null)
            {
                List<string> list = new List<string>();
                for (int i = 0; i < nodes.Count; i++)
                {
                    list.Add(nodes[i].Attributes[attrName].Value);
                }
                return list;
            }
            else
            {
                return null;
            }    
            
        }


    }

 

3,

posted @ 2013-01-31 11:39  金河  阅读(3711)  评论(0编辑  收藏  举报