HttpClient 爬去网络数据

废话不多说,上教程。🤩


创建 HttpHelper类

public class HttpHelper{
public static HttpClient Client { get; } = new HttpClient();
/// get请求 url 请求地址
public static async Task<string> GetHTMLByURLAsync(string url, string name="")
{
using (HttpClient client = new HttpClient())
{
string apiUrl = url+name;
// 创建 HttpRequestMessage 对象并设置 Method 和 Content
HttpRequestMessage request = new HttpRequestMessage(HttpMethod.Get, apiUrl);
// 设置自定义的 Header 参数
//HttpContent hc = new StreamContent(ms);
// 发送请求并获取响应
HttpResponseMessage response = await client.SendAsync(request);
if (response.IsSuccessStatusCode)
{
// 处理成功响应
string responseBody = await response.Content.ReadAsStringAsync();
return responseBody;
}
else return "";
}
}
/// <summary>
/// post 请求 application/json
/// </summary>
/// <param name="url"></param>
/// <param name="param"></param>
/// <returns></returns>
public static async Task<string> HttpPost(string url,string param) {
HttpClient Client = new HttpClient();
var json = JsonConvert.SerializeObject(new { param });
HttpContent content = new StringContent(json);
content.Headers.ContentType = new System.Net.Http.Headers.MediaTypeHeaderValue("application/json");
HttpResponseMessage response = await Client.PostAsync(url, content);
response.EnsureSuccessStatusCode();
if (response.IsSuccessStatusCode)
{
string responseBody = await response.Content.ReadAsStringAsync();
return responseBody;
}
else return "";
}
}

API使用

private static HtmlParser htmlParser = new HtmlParser(string url);
[HttpGet()]
public object get(){
//获取dom
var htmlDoc = HTTPHelper.GetHTMLByURLAsync(url).Result;
//HTML 解析成 IDocument
var dom = htmlParser.ParseDocument(htmlDoc);
//QuerySelectorAll方法接受 选择器语法
var list= dom.QuerySelectorAll("div.className");
List<string> urllist = new List<string>();
foreach (var item in list)
{
item.QuerySelectorAll("a").ToList().ForEach(a =>
{
var pageUrl = a.GetAttribute("href");//获取a元素中的 href属性值
urllist.Add(pageUrl);
});
}
return urllist;
}

到这里就🎉🎉🎉大功告成了!🎉🎉🎉 你学废了吗😀
posted @   浅·笑  阅读(16)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
· 三行代码完成国际化适配,妙~啊~
点击右上角即可分享
微信分享提示