使用HtmlAgilityPack批量抓取网页数据
相关软件点击下载
登录的处理。因为有些网页数据需要登陆后才能提取。这里要使用ieHTTPHeaders来提取登录时的提交信息。
抓取网页
































如果出现乱码,调整编码集为gb2312或者是utf-8

-------------------------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.VisualStudio.TestTools.WebTesting;
using HtmlAgilityPack;
public class WebTest1Coded : WebTest
{
public override IEnumerator<WebTestRequest> GetRequestEnumerator()
{
WebTestRequest request1 = new WebTestRequest("http://www.microsoft.com/");
request1.ValidateResponse += new EventHandler<ValidationEventArgs>(request1_ValidateResponse);
yield return request1;
}
void request1_ValidateResponse(object sender, ValidationEventArgs e)
{
//load the response body string as an HtmlAgilityPack.HtmlDocument
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(e.Response.BodyString);
//locate the "Nav" element
HtmlNode navNode = doc.GetElementbyId("Nav");
//pick the first <li> element
HtmlNode firstNavItemNode = navNode.SelectSingleNode(".//li");
//validate the first list item in the Nav element says "Windows"
e.IsValid = firstNavItemNode.InnerText == "Windows";
}
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步