XPathClass类,使用xpath返回属性值或文本
1,需要添加文件HtmlAgilityPack.dll以及对它的引用
2,类码
public class XPathClass { /// <summary> /// 返回节点内的文本值,如<span>文字</span>,返回"文字" /// </summary> /// <param name="htmlSource">html页面源代码</param> /// <param name="xpath">xpath路径</param> /// <returns>一个节点对应文本值</returns> public static string GetInnerText(string htmlSource,string xpath) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath); if (node != null) { return node.InnerText; } else { return null; } } /// <summary> /// 获取节点的属性值,如<a href="#"></a>返回herf的值为# /// </summary> /// <param name="htmlSource">html源代码</param> /// <param name="xpath">节点的xpath路径</param> /// <param name="attrName">属性名</param> /// <returns>属性名对应的值</returns> public static string GetAttribute(string htmlSource,string xpath,string attrName) { HtmlAgilityPack.HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNode node = doc.DocumentNode.SelectSingleNode(xpath); if (node != null) { return node.Attributes[attrName].Value; } else { return null; } } /// <summary> /// 返回满足相同xpath的多个节点内的文本值,如<span>文字</span>,返回"文字" /// </summary> /// <param name="htmlSource">html页面源代码</param> /// <param name="xpath">xpath路径</param> /// <returns>一个节点对应文本值</returns> public static List<string> GetInnerTexts(string htmlSource, string xpath) { HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath); if (nodes != null) { List<string> list = new List<string>(); for (int i = 0; i < nodes.Count; i++) { list.Add(nodes[i].InnerText); } return list; } else { return null; } } /// <summary> /// 获取多个节点的属性值,如<a href="#"></a>返回herf的值为# /// </summary> /// <param name="htmlSource">html源代码</param> /// <param name="xpath">节点的xpath路径</param> /// <param name="attrName">属性名</param> /// <returns>属性名对应的值</returns> public static List<string> GetAttributes(string htmlSource, string xpath, string attrName) { HtmlAgilityPack.HtmlDocument doc = new HtmlDocument(); doc.LoadHtml(htmlSource); HtmlNodeCollection nodes = doc.DocumentNode.SelectNodes(xpath); if (nodes != null) { List<string> list = new List<string>(); for (int i = 0; i < nodes.Count; i++) { list.Add(nodes[i].Attributes[attrName].Value); } return list; } else { return null; } } }
3,