C#使用xpath简单爬取网站的内容
public static void Get() { // string xpathtrI = "//*[@id='classify-list']/dl/dd/a/cite/span/i"; #region 获取首页的分类 ////主页的html地址 //string urlHome = "http://www.qidian.com/"; //HtmlWeb web = new HtmlWeb(); //HtmlAgilityPack.HtmlDocument htmlDoc = web.Load(urlHome); ////创建html的节点 //HtmlNode rootNode1 = htmlDoc.DocumentNode; //string xpathtrA = "//*[@id='classify-list']/dl/dd"; //HtmlNodeCollection classList = rootNode1.SelectNodes(xpathtrA); //List<string> listINode = new List<string>(); //foreach (HtmlNode item in classList) //{ // //获取分类 // string inode = item.SelectSingleNode("//a/cite/span/i").InnerText; // listINode.Add(inode); //} #endregion #region 分类及明细 //string urlDetail = "http://xuanhuan.qidian.com/"; //List<string> urlList = new List<string>(); //urlList.Add("http://xuanhuan.qidian.com/"); //urlList.Add("http://qihuan.qidian.com/"); //urlList.Add("http://wuxia.qidian.com/"); //urlList.Add("http://xianxia.qidian.com/"); //urlList.Add("http://dushi.qidian.com/"); //urlList.Add("http://zhichang.qidian.com/"); //urlList.Add("http://junshi.qidian.com/"); //urlList.Add("http://lishi.qidian.com/"); //urlList.Add("http://youxi.qidian.com/"); //urlList.Add("http://tiyu.qidian.com/"); //urlList.Add("http://kehuan.qidian.com/"); //urlList.Add("http://lingyi.qidian.com/");//foreach (string url in urlList) //{ // HtmlAgilityPack.HtmlDocument htmlDetail = web.Load(url); // HtmlNode rootNode2 = htmlDetail.DocumentNode; // string a = "//*[@class='book-info']"; // HtmlNodeCollection classList2 = rootNode2.SelectNodes(a); // List<string> listINode2 = new List<string>(); // foreach (HtmlNode item in classList2) // { // //获取分类 // string inode = item.InnerHtml; // listINode2.Add(inode); // } //} #endregion #region 文章内容 HtmlWeb web = new HtmlWeb(); string u = "http://read.qidian.com/chapter/zOGI9RYmNdFhO--gcH8iFg2/h3iHSEH1cSpMs5iq0oQwLQ2"; HtmlAgilityPack.HtmlDocument htmlDocment = web.Load(u); //创建html的节点 HtmlNode htmlNode = htmlDocment.DocumentNode; string x = "//*[@class='read-content j_readContent']"; HtmlNode htmlNodeP = htmlNode.SelectSingleNode(x); string htmlD = htmlNodeP.InnerText; #endregion }
只是拿一个例子而已。
我是小白,新建立了一个的群:461431726,希望在这里和大家一起交流,共同学习。前端的话建议加群:646564351,谢谢