数据抓取的一个类,包含一些常用的方法
using System;
using System.Configuration;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace XXX
{
/// <summary>
/// Func 的摘要说明。
/// </summary>
public class Func
{
public CookieContainer myCookieContainer = new CookieContainer();
public void SetCookie(string cookieStr, string domain)
{
string[] cookstr = cookieStr.Split(';');
foreach (string str in cookstr)
{
string[] cookieNameValue = str.Split('=');
Cookie ck = new Cookie(cookieNameValue[0].Trim().ToString(), cookieNameValue[1].Trim().ToString());
ck.Domain = domain;
myCookieContainer.Add(ck);
}
}
public string GetPage(string PageUrl)
{
return GetPage(PageUrl, Encoding.Default);
}
public string GetPage(string PageUrl, Encoding encoding)
{
string backstr = string.Empty;
try
{
//System.Net.WebRequest request = System.Net.WebRequest.Create(PageUrl);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(PageUrl);
request.CookieContainer = myCookieContainer;
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
resStream.ReadTimeout = 8000;
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
backstr = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
catch (Exception ex)
{
Log.WriteError(ex.Message);
backstr = "";
}
return backstr;
}
public string PostPage(string PageUrl, string postData)
{
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] data = encoding.GetBytes(postData);
HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(PageUrl);
myRequest.Method = "POST";
myRequest.ContentType = "application/x-www-form-urlencoded";
myRequest.ContentLength = data.Length;
Stream newStream = myRequest.GetRequestStream();
// Send the data.
newStream.Write(data, 0, data.Length);
newStream.Close();
// Get response
HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.Default);
string content = reader.ReadToEnd();
reader.Close();
return content;
}
public string[] StrSplit(string mystr, string splitstr)
{
string str = mystr.Replace(splitstr, "\x254");
string[] backstring = str.Split('\x254');
return backstring;
}
public string Cutstr(string mystr, string str1, string str2)
{
string backstr = string.Empty;
int strstart = 0;
int strend = mystr.Length;
if (str1 != "")
strstart = mystr.IndexOf(str1);
if (str2 != "")
strend = mystr.IndexOf(str2, strstart + str1.Length);
if (strstart != -1 && strend != -1)
backstr = mystr.Substring(strstart + str1.Length, strend - strstart - str1.Length);
else
backstr = "-1";
return backstr;
}
public string ReplaceFirst(string mystr, string oldstr, string newstr)
{
string backstr = mystr;
int oldindex = mystr.IndexOf(oldstr);
if (oldindex > -1)
{
backstr = backstr.Remove(oldindex, oldstr.Length);
backstr = backstr.Insert(oldindex, newstr);
}
return backstr;
}
public void writetxt(string pathstr, string content)
{
FileStream fs = new FileStream(pathstr, FileMode.Create, FileAccess.Write);
StreamWriter sw = new StreamWriter(fs, Encoding.UTF8);
sw.Write(content);
sw.Close();
fs.Close();
}
public string readtxt(string pathstr)
{
string tmpstr = string.Empty;
FileStream fs = new FileStream(pathstr, FileMode.Open, FileAccess.Read);
StreamReader sr = new StreamReader(fs, Encoding.UTF8);
tmpstr = sr.ReadToEnd();
sr.Close();
fs.Close();
return tmpstr;
}
public string FilterLink(string str)
{
string tmpstr = str;
Regex re = new Regex(@"<a[^>]*href=(""(?<href>[^""]*)""|'(?<href>[^']*)'|(?<href>[^\s>]*))[^>]*>(?<text>.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
MatchCollection mc = re.Matches(tmpstr);
for (int i = 0; i < mc.Count; i++)
{
tmpstr = tmpstr.Replace(mc[i].Value, mc[i].Groups["text"].Value);
}
return tmpstr;
}
}
}
using System.Configuration;
using System.IO;
using System.Net;
using System.Text;
using System.Text.RegularExpressions;
namespace XXX
{
/// <summary>
/// Func 的摘要说明。
/// </summary>
public class Func
{
public CookieContainer myCookieContainer = new CookieContainer();
public void SetCookie(string cookieStr, string domain)
{
string[] cookstr = cookieStr.Split(';');
foreach (string str in cookstr)
{
string[] cookieNameValue = str.Split('=');
Cookie ck = new Cookie(cookieNameValue[0].Trim().ToString(), cookieNameValue[1].Trim().ToString());
ck.Domain = domain;
myCookieContainer.Add(ck);
}
}
public string GetPage(string PageUrl)
{
return GetPage(PageUrl, Encoding.Default);
}
public string GetPage(string PageUrl, Encoding encoding)
{
string backstr = string.Empty;
try
{
//System.Net.WebRequest request = System.Net.WebRequest.Create(PageUrl);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(PageUrl);
request.CookieContainer = myCookieContainer;
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; .NET CLR 1.1.4322)";
System.Net.WebResponse response = request.GetResponse();
System.IO.Stream resStream = response.GetResponseStream();
resStream.ReadTimeout = 8000;
System.IO.StreamReader sr = new System.IO.StreamReader(resStream, encoding);
backstr = sr.ReadToEnd();
resStream.Close();
sr.Close();
}
catch (Exception ex)
{
Log.WriteError(ex.Message);
backstr = "";
}
return backstr;
}
public string PostPage(string PageUrl, string postData)
{
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] data = encoding.GetBytes(postData);
HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(PageUrl);
myRequest.Method = "POST";
myRequest.ContentType = "application/x-www-form-urlencoded";
myRequest.ContentLength = data.Length;
Stream newStream = myRequest.GetRequestStream();
// Send the data.
newStream.Write(data, 0, data.Length);
newStream.Close();
// Get response
HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.Default);
string content = reader.ReadToEnd();
reader.Close();
return content;
}
public string[] StrSplit(string mystr, string splitstr)
{
string str = mystr.Replace(splitstr, "\x254");
string[] backstring = str.Split('\x254');
return backstring;
}
public string Cutstr(string mystr, string str1, string str2)
{
string backstr = string.Empty;
int strstart = 0;
int strend = mystr.Length;
if (str1 != "")
strstart = mystr.IndexOf(str1);
if (str2 != "")
strend = mystr.IndexOf(str2, strstart + str1.Length);
if (strstart != -1 && strend != -1)
backstr = mystr.Substring(strstart + str1.Length, strend - strstart - str1.Length);
else
backstr = "-1";
return backstr;
}
public string ReplaceFirst(string mystr, string oldstr, string newstr)
{
string backstr = mystr;
int oldindex = mystr.IndexOf(oldstr);
if (oldindex > -1)
{
backstr = backstr.Remove(oldindex, oldstr.Length);
backstr = backstr.Insert(oldindex, newstr);
}
return backstr;
}
public void writetxt(string pathstr, string content)
{
FileStream fs = new FileStream(pathstr, FileMode.Create, FileAccess.Write);
StreamWriter sw = new StreamWriter(fs, Encoding.UTF8);
sw.Write(content);
sw.Close();
fs.Close();
}
public string readtxt(string pathstr)
{
string tmpstr = string.Empty;
FileStream fs = new FileStream(pathstr, FileMode.Open, FileAccess.Read);
StreamReader sr = new StreamReader(fs, Encoding.UTF8);
tmpstr = sr.ReadToEnd();
sr.Close();
fs.Close();
return tmpstr;
}
public string FilterLink(string str)
{
string tmpstr = str;
Regex re = new Regex(@"<a[^>]*href=(""(?<href>[^""]*)""|'(?<href>[^']*)'|(?<href>[^\s>]*))[^>]*>(?<text>.*?)</a>", RegexOptions.IgnoreCase | RegexOptions.Singleline);
MatchCollection mc = re.Matches(tmpstr);
for (int i = 0; i < mc.Count; i++)
{
tmpstr = tmpstr.Replace(mc[i].Value, mc[i].Groups["text"].Value);
}
return tmpstr;
}
}
}