自己用C#写一个采集器、蜘蛛(zz)

效果图:

C# 采集器 蜘蛛

代码如下:

 

using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.Web;
using System.IO;
using System.Collections;
using System.Text.RegularExpressions;


namespace chinaz
{
    
class Program
    {
        
static void Main(string[] args)
        {

            
string cookie = null;
            
using (StreamReader sr = new StreamReader("cookie.txt"))
            {
                cookie 
= sr.ReadToEnd();
                sr.Close();
            }
            
//string tmp = SRWebClient.GetPage("http://bbs.chinaz.com/Members.html?page=1&sort=CreateDate&desc=true&keyword=", Encoding.UTF8, cookie);
            int a = int.Parse(Console.ReadLine());
            
int b = int.Parse(Console.ReadLine());
            
string url = Console.ReadLine();

            Hashtable hash 
= new Hashtable();
            Encoding encoding 
= Encoding.GetEncoding(Console.ReadLine());

            
for (int i = a; i <= b; i++)
            {
                
string html = SRWebClient.GetPage(string.Format(url, i), encoding, cookie);
                
//Console.WriteLine(html);
                if (html != null && html.Length > 1000)
                {
                    Match m 
= Regex.Match(html, @"\w+([-+.']\w+)*@\w+([-.]\w+)*\.\w+([-.]\w+)*", RegexOptions.Compiled | RegexOptions.IgnoreCase);
                    
while (m != null && m.Value != null && m.Value.Trim() != string.Empty)
                    {
                        
if (!hash.Contains(m.Value))
                        {
                            Console.WriteLine(m.Value);
                            
using (StreamWriter sw = new StreamWriter("mail.txt"true))
                            {
                                sw.WriteLine(m.Value);
                                sw.Close();
                            }
                            hash.Add(m.Value, 
string.Empty);
                        }
                        m 
= m.NextMatch();
                    }

                }
            }



            Console.Write(
"完成");
            Console.ReadLine();
        }
    }


    
public class SRWebClient
    {
        
public CookieCollection cookie;
        
public SRWebClient()
        {
            cookie 
= null;
        }

        
#region 从包含多个 Cookie 的字符串读取到 CookieCollection 集合中
        
private static void AddCookieWithCookieHead(ref CookieCollection cookieCol, string cookieHead, string defaultDomain)
        {
            
if (cookieCol == null) cookieCol = new CookieCollection();
            
if (cookieHead == nullreturn;
            
string[] ary = cookieHead.Split(';');
            
for (int i = 0; i < ary.Length; i++)
            {
                Cookie ck 
= GetCookieFromString(ary[i].Trim(), defaultDomain);
                
if (ck != null)
                {
                    cookieCol.Add(ck);
                }
            }
        }
        
#endregion

        
#region 读取某一个 Cookie 字符串到 Cookie 变量中
        
private static Cookie GetCookieFromString(string cookieString, string defaultDomain)
        {
            
string[] ary = cookieString.Split(',');
            Hashtable hs 
= new Hashtable();
            
for (int i = 0; i < ary.Length; i++)
            {
                
string s = ary[i].Trim();
                
int index = s.IndexOf("=");
                
if (index > 0)
                {
                    hs.Add(s.Substring(
0, index), s.Substring(index + 1));
                }
            }
            Cookie ck 
= new Cookie();
            
foreach (object Key in hs.Keys)
            {
                
if (Key.ToString() == "path") ck.Path = hs[Key].ToString();

                
else if (Key.ToString() == "expires")
                {
                    
//ck.Expires=DateTime.Parse(hs[Key].ToString();
                }
                
else if (Key.ToString() == "domain") ck.Domain = hs[Key].ToString();
                
else
                {
                    ck.Name 
= Key.ToString();
                    ck.Value 
= hs[Key].ToString();
                }
            }
            
if (ck.Name == ""return null;
            
if (ck.Domain == "") ck.Domain = defaultDomain;
            
return ck;
        }
        
#endregion



        
/**/
        
/// <TgData>
        
///     <Alias>下载Web源代码</Alias>
        
/// </TgData>
        public string DownloadHtml(string URL, bool CreateCookie)
        {
            
try
            {
                HttpWebRequest request 
= HttpWebRequest.Create(URL) as HttpWebRequest;
                
if (cookie != null)
                {
                    request.CookieContainer 
= new CookieContainer();
                    request.CookieContainer.Add(cookie);
                }
                request.AllowAutoRedirect 
= false;
                
//request.MaximumAutomaticRedirections = 3;
                request.Timeout = 20000;

                HttpWebResponse res 
= (HttpWebResponse)request.GetResponse();
                
string r = "";

                System.IO.StreamReader S1 
= new System.IO.StreamReader(res.GetResponseStream(), System.Text.Encoding.Default);
                
try
                {
                    r 
= S1.ReadToEnd();
                    
if (CreateCookie)
                        cookie 
= res.Cookies;
                }
                
catch (Exception er)
                {
                    
//Log l = new Log();
                    
//l.writelog("下载Web错误", er.ToString());
                }
                
finally
                {
                    res.Close();
                    S1.Close();
                }

                
return r;
            }

            
catch
            {

            }

            
return string.Empty;
        }

        
/**/
        
/// <TgData>
        
///     <Alias>下载文件</Alias>
        
/// </TgData>
        public long DownloadFile(string FileURL, string FileSavePath, bool CreateCookie)
        {
            
long Filelength = 0;
            HttpWebRequest req 
= HttpWebRequest.Create(FileURL) as HttpWebRequest;

            
if (cookie != null)
            {
                req.CookieContainer 
= new CookieContainer();
                req.CookieContainer.Add(cookie);
            }
            req.AllowAutoRedirect 
= true;

            HttpWebResponse res 
= req.GetResponse() as HttpWebResponse;
            
if (CreateCookie)
                cookie 
= res.Cookies;
            System.IO.Stream stream 
= res.GetResponseStream();
            
try
            {
                Filelength 
= res.ContentLength;

                
byte[] b = new byte[512];

                
int nReadSize = 0;
                nReadSize 
= stream.Read(b, 0512);

                System.IO.FileStream fs 
= System.IO.File.Create(FileSavePath);
                
try
                {
                    
while (nReadSize > 0)
                    {
                        fs.Write(b, 
0, nReadSize);
                        nReadSize 
= stream.Read(b, 0512);
                    }
                }
                
finally
                {
                    fs.Close();
                }
            }
            
catch (Exception er)
            {
                
//Log l = new Log();
                
//l.writelog("下载文件错误", er.ToString());
            }
            
finally
            {
                res.Close();
                stream.Close();
            }

            
return Filelength;
        }

        
/**/
        
/// <TgData>
        
///     <Alias>提交数据</Alias>
        
/// </TgData>
        public string Request(string RequestPageURL, RequestData Data, bool CreateCookie)
        {
            StreamReader reader 
= null;
            HttpWebResponse response 
= null;
            HttpWebRequest request 
= null;
            
try
            {
                
string StrUrl = RequestPageURL;
                request 
= HttpWebRequest.Create(StrUrl) as HttpWebRequest;

                
string postdata = Data.GetData();
                request.Referer 
= RequestPageURL;
                request.AllowAutoRedirect 
= false;
                request.UserAgent 
= "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; Maxthon; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
                request.Timeout 
= 20000;

                
if (cookie != null)
                {
                    request.CookieContainer 
= new CookieContainer();
                    request.CookieContainer.Add(cookie);
                }

                Uri u 
= new Uri(StrUrl);

                
if (postdata.Length > 0//包含要提交的数据 就使用Post方式
                {
                    request.ContentType 
= "application/x-www-form-urlencoded"//作为表单请求
                    request.Method = "POST";        //方式就是Post

                    
//把提交的数据换成字节数组
                    Byte[] B = System.Text.Encoding.UTF8.GetBytes(postdata);
                    request.ContentLength 
= B.Length;

                    System.IO.Stream SW 
= request.GetRequestStream(); //开始提交数据
                    SW.Write(B, 0, B.Length);
                    SW.Close();
                }

                response 
= request.GetResponse() as HttpWebResponse;
                
if (CreateCookie)
                    
//cookie = response.Cookies;
                    AddCookieWithCookieHead(ref cookie, response.Headers["Set-Cookie"], request.RequestUri.Host);
                reader 
= new StreamReader(response.GetResponseStream(), Encoding.Default);

                
return reader.ReadToEnd();
            }
            
catch (Exception ex)
            {
                
string x = ex.StackTrace;
            }
            
finally
            {
                
if (response != null)
                    response.Close();
            }

            
return string.Empty;
        }


        
public bool PostDownload(RequestData Data, out string file)
        {
            file 
= null;
            StreamReader reader 
= null;
            HttpWebResponse response 
= null;
            HttpWebRequest request 
= null;
            
try
            {
                
string StrUrl = "http://www.imobile.com.cn/wapdiyringdownload.php";
                request 
= HttpWebRequest.Create(StrUrl) as HttpWebRequest;

                
string postdata = Data.GetData();
                request.Referer 
= StrUrl;
                request.AllowAutoRedirect 
= false;
                request.UserAgent 
= "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; Maxthon; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
                request.Timeout 
= 20000;

                
if (cookie != null)
                {
                    request.CookieContainer 
= new CookieContainer();
                    request.CookieContainer.Add(cookie);
                }

                Uri u 
= new Uri(StrUrl);

                
if (postdata.Length > 0//包含要提交的数据 就使用Post方式
                {
                    request.ContentType 
= "application/x-www-form-urlencoded"//作为表单请求
                    request.Method = "POST";        //方式就是Post

                    
//把提交的数据换成字节数组
                    Byte[] B = System.Text.Encoding.UTF8.GetBytes(postdata);
                    request.ContentLength 
= B.Length;

                    System.IO.Stream SW 
= request.GetRequestStream(); //开始提交数据
                    SW.Write(B, 0, B.Length);
                    SW.Close();
                }

                response 
= request.GetResponse() as HttpWebResponse;
                
string des = response.Headers["Content-Disposition"].Trim();
                file 
= des.Substring(des.IndexOf("filename="+ 9);
                file 
= new Random().Next(100).ToString() + "/" + file;

                System.IO.Stream stream 
= response.GetResponseStream();
                
try
                {
                    
int Filelength = (int)response.ContentLength;

                    
byte[] b = new byte[512];

                    
int nReadSize = 0;
                    nReadSize 
= stream.Read(b, 0512);

                    System.IO.FileStream fs 
= System.IO.File.Create("f:/mobileMusic/" + file);
                    
try
                    {
                        
while (nReadSize > 0)
                        {
                            fs.Write(b, 
0, nReadSize);
                            nReadSize 
= stream.Read(b, 0512);
                        }
                    }
                    
finally
                    {
                        fs.Close();
                    }
                }
                
catch (Exception er)
                {
                    
//Log l = new Log();
                    
//l.writelog("下载文件错误", er.ToString());
                }
                
finally
                {
                    response.Close();
                    stream.Close();
                }
            }
            
catch (Exception ex)
            {
                
string x = ex.StackTrace;
            }
            
finally
            {
                
if (response != null)
                    response.Close();
            }
            
return true;
        }
        
#region GetPage
        
/// <summary>
        
/// 获取源代码
        
/// </summary>
        
/// <param name="url"></param>
        
/// <param name="coding"></param>
        
/// <param name="TryCount"></param>
        
/// <returns></returns>
        public static string GetPage(string url, Encoding encoding, int TryCount)
        {
            
for (int i = 0; i < TryCount; i++)
            {
                
string result = GetPage(url, encoding, null);
                
if (result != null && result != string.Empty)
                    
return result;
            }

            
return string.Empty;
        }

        
/// <summary>
        
/// 获取源代码
        
/// </summary>
        
/// <param name="url"></param>
        
/// <param name="coding"></param>
        
/// <returns></returns>
        public static string GetPage(string url, Encoding encoding, string cookie)
        {
            HttpWebRequest request 
= null;
            HttpWebResponse response 
= null;
            StreamReader reader 
= null;
            
try
            {
                request 
= (HttpWebRequest)WebRequest.Create(url);
                request.UserAgent 
= "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2;)";
                request.Timeout 
= 20000;
                request.AllowAutoRedirect 
= false;
                
if (cookie != null)
                    request.Headers[
"Cookie"= cookie;

                response 
= (HttpWebResponse)request.GetResponse();
                
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
                {
                    reader 
= new StreamReader(response.GetResponseStream(), encoding);
                    
string html = reader.ReadToEnd();

                    
return html;
                }
            }
            
catch
            {
            }
            
finally
            {

                
if (response != null)
                {
                    response.Close();
                    response 
= null;
                }
                
if (reader != null)
                    reader.Close();

                
if (request != null)
                    request 
= null;

            }

            
return string.Empty;
        }
        
#endregion
    }

    
public class RequestData
    {
        Hashtable hash 
= new Hashtable();

        
public RequestData()
        {

        }

        
public string GetData()
        {
            
string r = "";

            
foreach (string key in hash.Keys)
            {
                
if (r.Length > 0) r += "&";
                r 
+= key + "=" + hash[key];
            }

            
return r;
        }

        
public void AddField(string Field, string Value)
        {
            hash[Field] 
= Value;
        }


    }
}
posted @ 2008-10-11 16:48  stu_acer  阅读(981)  评论(0编辑  收藏  举报