csdn爬网

  public partial class Program
{
static void Main(string[] args)
{
CloseIE();
string url = "http://blog.csdn.net/dz45693";
string html = GetRequest(url);
int count = GetPageCount(html);

for (int i = 1; i <= count; i++)
{
string tempurl = url + "/article/list/" + i.ToString();
html
= GetRequest(tempurl);
List
<string> links = GetPageLink(html);
foreach (string link in links)
{
SendRequest(link);
}
CloseIE();
}
}

private static void CloseIE()
{
Process[] ps
= Process.GetProcessesByName("iexplore");
foreach (Process item in ps)
{
try
{
item.CloseMainWindow();
item.Close();
TerminateProcess(item.Id,
0);

}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
}

}
Thread.Sleep(
1000);
}

static string GetRequest(string url)
{
try
{
HttpWebRequest request
= (HttpWebRequest)HttpWebRequest.Create(url);
request.Proxy
= WebProxy.GetDefaultProxy();
request.Proxy.Credentials
= CredentialCache.DefaultCredentials;
HttpWebResponse response
= (HttpWebResponse)request.GetResponse();
string responseText = string.Empty;
using (StreamReader sr = new StreamReader(response.GetResponseStream()))
{
responseText
= sr.ReadToEnd();
}
response.Close();
request.Abort();
return responseText;
}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
return string.Empty;
}
}

static bool SendRequest(string url)
{
try
{
//HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);
//HttpWebResponse response = (HttpWebResponse)request.GetResponse();

//response.Close();
//request.Abort();
Process p = new Process();
p.StartInfo.Arguments
= url;
p.StartInfo.FileName
= @"C:\Program Files\Internet Explorer\iexplore.exe";
p.Start();
Thread.Sleep(
1000*10);
p.CloseMainWindow();
// p.Close();
TerminateProcess(p.Id, 0);
return true;

}
catch (Exception ex)
{
Trace.WriteLine(ex.Message);
return false;
}
}

static int GetPageCount(string html)
{
int count = 0;
Regex reg
= new Regex(@"共(\d{1,})页");
Match m
= reg.Match(html);
if (m.Success)
{
count
= int.Parse(m.Groups[1].Value);
}
return count;
}

static List<string> GetPageLink(string html)
{
List
<string> list = new List<string>();
int startindex = html.IndexOf("article_list");
int endindex = html.IndexOf("papelist");
html
= html.Substring(startindex, endindex - startindex);
Regex reg
= new Regex(@"/dz45693/article/details/(\d{1,})");
MatchCollection mc
= reg.Matches(html);
foreach (Match m in mc)
{
string url = "http://blog.csdn.net" + m.Value;
if(!list.Contains(url))
list.Add(url);
}
return list;
}

[SuppressUnmanagedCodeSecurity]
[DllImport(
"kernel32")]
public static extern long TerminateProcess(int handle, int exitCode);


}

posted on 2011-08-31 16:50  dz45693  阅读(152)  评论(0编辑  收藏  举报

导航