csdn爬网
public partial class Program
{
static void Main(string[] args)
{
CloseIE();
string url = "http://blog.csdn.net/dz45693";
string html = GetRequest(url);
int count = GetPageCount(html);
for (int i = 1; i <= count; i++)
{
string tempurl = url + "/article/list/" + i.ToString();
html = GetRequest(tempurl);
List<string> links = GetPageLink(html);
foreach (string link in links)
{
SendRequest(link);
}
CloseIE();
}
}
private static void CloseIE()
{
Process[] ps = Process.GetProcessesByName("iexplore");
foreach (Process item in ps)
{
try
{
item.CloseMainWindow();
item.Close();
TerminateProcess(item.Id, 0);
}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
}
}
Thread.Sleep(1000);
}
static string GetRequest(string url)
{
try
{
HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
request.Proxy = WebProxy.GetDefaultProxy();
request.Proxy.Credentials = CredentialCache.DefaultCredentials;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
string responseText = string.Empty;
using (StreamReader sr = new StreamReader(response.GetResponseStream()))
{
responseText = sr.ReadToEnd();
}
response.Close();
request.Abort();
return responseText;
}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
return string.Empty;
}
}
static bool SendRequest(string url)
{
try
{
//HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
//HttpWebResponse response = (HttpWebResponse)request.GetResponse();
//response.Close();
//request.Abort();
Process p = new Process();
p.StartInfo.Arguments = url;
p.StartInfo.FileName = @"C:\Program Files\Internet Explorer\iexplore.exe";
p.Start();
Thread.Sleep(1000*10);
p.CloseMainWindow();
// p.Close();
TerminateProcess(p.Id, 0);
return true;
}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
return false;
}
}
static int GetPageCount(string html)
{
int count = 0;
Regex reg = new Regex(@"共(\d{1,})页");
Match m = reg.Match(html);
if (m.Success)
{
count = int.Parse(m.Groups[1].Value);
}
return count;
}
static List<string> GetPageLink(string html)
{
List<string> list = new List<string>();
int startindex = html.IndexOf("article_list");
int endindex = html.IndexOf("papelist");
html = html.Substring(startindex, endindex - startindex);
Regex reg = new Regex(@"/dz45693/article/details/(\d{1,})");
MatchCollection mc = reg.Matches(html);
foreach (Match m in mc)
{
string url = "http://blog.csdn.net" + m.Value;
if(!list.Contains(url))
list.Add(url);
}
return list;
}
[SuppressUnmanagedCodeSecurity]
[DllImport("kernel32")]
public static extern long TerminateProcess(int handle, int exitCode);
}
windows技术爱好者
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· .NET Core 中如何实现缓存的预热?
· 从 HTTP 原因短语缺失研究 HTTP/2 和 HTTP/3 的设计差异
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构