多线程抓去mm8mm8.com数据
static void Main(string[] args) { List<Site> sites = new List<Site>{ new Site{page=32,url="http://www.mm8mm8.com/model/p{*}.html",type="model"}, new Site{page=38,url="http://www.mm8mm8.com/sexy/p{*}.html",type="sexy"}, new Site{page=33,url="http://www.mm8mm8.com/belle/p{*}.html",type="belle"}, new Site{page=26,url="http://www.mm8mm8.com/stars/p{*}.html",type="stars"}, new Site{page=7,url="http://www.mm8mm8.com/rihan/list_11_{*}.html",type="rihan"}, new Site{page=6,url="http://www.mm8mm8.com/siwa/list_12_{*}.html",type="siwa"} }; foreach (var site in sites) { //ExcuteThread(site); // Console.WriteLine(site.url); Thread thread = new Thread(ExcuteThread); thread.Start(site); } } public static void ExcuteThread(object obsite) { Site site = (Site)obsite; for (int i=1; i <= site.page; i++) { String url = site.url.Replace("{*}", "" + i); //string getpath=Path.GetFullPath("html/" + site.type); //if (!Directory.Exists(getpath)) //Directory.CreateDirectory(getpath); //创建文件 //File.WriteAllText("html/" + site.type + "/list" + i + ".html", DownloadString(url)); String gethtml= DownloadString(url); MatchCollection returnhtml= Regex.Matches(gethtml, "<li><a href=\\s*(?:\"(?<1>[^\"]*)\") title"); foreach (Match mc in returnhtml) { try { url = "http://www.mm8mm8.com" + mc.Value.Replace("<li><a href=\"", "").Replace("\" title", ""); gethtml = DownloadString(url); int page = int.Parse(Regex.Match(gethtml, "共(.*)页:").Value.Replace("共", "").Replace("页:", "")); url = url.Substring(0, url.LastIndexOf(".")); for (int k = 2; k <= page; k++) { String contenturl = url + "_" + k + ".html"; gethtml = DownloadString(contenturl); string a = Regex.Match(gethtml, "<img src=\\s*(?:\"(?<1>[^\"]*)\") /></a></p>").Groups[0].Value; a = Regex.Match(a, "src=\\s*(?:\"(?<1>[^\"]*)\")").Groups[0].Value.Replace("src=\"", "").Replace("\"", ""); SaveFile(a, "Images/" + site.type); } } catch (Exception ex) { Console.WriteLine(ex.Message.ToString()); continue; } } } } public static Stream DownloadStream(string url) { var imageRequest = (HttpWebRequest)WebRequest.Create(url); imageRequest.Timeout = 1000 * 5; //5s 超时 var imageResponse = (HttpWebResponse)imageRequest.GetResponse(); return imageResponse.GetResponseStream(); } public static String DownloadString(string url) { var request = (HttpWebRequest)WebRequest.Create(url); //request.Timeout = 1000 * 5; //5s过期 var response = (HttpWebResponse)request.GetResponse(); StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding("gb2312")); return reader.ReadToEnd(); } public static void SaveFile(String path, String filename, Stream instream) { //if(!filename.Contains(".")) // return; //var filetype = filename.Substring(filename.LastIndexOf(".")); //switch (filetype) //{ // case ".jpg": var localFile = path + filename; Image image = Image.FromStream(instream); image.Save(localFile); image.Dispose(); // break; // default: // break; //} } public static void SaveFile(String url,String path="Images") { var fullpath = Path.GetFullPath(path); //获取全部路径 if (!Directory.Exists(fullpath)) Directory.CreateDirectory(fullpath); //创建文件 var filetype = url.Substring(url.LastIndexOf(".")); WebClient wb = new WebClient(); wb.Proxy = null; Random ro = new Random(10); long tick = DateTime.Now.Ticks; Random ran = new Random((int)(tick & 0xffffffffL) | (int)(tick >> 32)); path = path + "/" + ran.Next() + filetype; wb.DownloadFile(url, path); }