- 关于HTTP协议
http协议是一种无状态协议。这是首先要明确的,客户端(client)发送一个请求,服务端(server)收到之后,根据请求的URL和HTTP头信息,给出相应的答案。505,404,400等,一般正确的200,服务器除了IP和UserAgent等常用信息之外,服务器无法知道具体的标示,也就是说服务器无法知道这个请求来自哪个客户端,OK!
那么就引入了Cookie的概念,服务器一般用cookie去标示客户端,可见cookie对于现在web系统的重要性,如果没有cookie现在的web啥不是。
也就是说Cookie的web交互核心之一 - 要实现web自动化操作,就要控制Cookie以及http头等信息,现在假设一个场景,
QQ邮箱登陆:
1,登陆QQ邮箱的主地址(http://mail.qq.com)
请求头如上
响应内容,跳转到登陆页(因为没有登陆后的cookie标示)
2,会经过几个跳转步骤之后跳转到HTTPS登陆(https://mail.qq.com/cgi-bin/loginpage?&res=local)
3,输入账号登陆
输入密码后会跳转到 使用get方式提交表单,如果登陆成功会写Cookie
4,登陆成功之后我们再次进入通过mail.qq.com域名进入,也会跳转到登陆页,但是由于请求头中的cookie已经包含登陆标示,所以会直接跳转到邮箱url
- 重上述过程可以看出用户验证和识别都是依赖于Cookie,那么我们就需要一个能够控制Cookie,能够定制HTTP头,能发起HTTP请求的功能模块,也许有人会问为什么不用webClient或webBrowser,这个要重2方面说明,一个是性能webBrowser会加载页面所有的东西,而且共享IE浏览器的信息,我们是需要独立的,因为有可能会一个程序同时登陆多个用户,WebClient在提交Post数据一个获取HTTP头方面比较弱,所有就只能是webReques和自定义Cookie组合
- Cookie自定义管理实现
- 自定义web操作类
public class CookieManager
{ /**
* Parse a Cookie: header into individual tokens according to RFC 2109.
*/
private class CookieTokenizer
{
/**
* Upper bound on the number of cookie tokens to accept. The limit is
* based on the 4 different attributes (4.3.4) and the 20 cookie minimum
* (6.3) given in RFC 2109 multiplied by 2 to accomodate the 2 tokens in
* each name=value pair ("JSESSIONID=1234" is 2 tokens).
*/
private const int MAX_COOKIE_TOKENS = 4 * 20 * 2;
/**
* Array of cookie tokens. Even indices contain name tokens while odd
* indices contain value tokens (or null).
*/
public string[] tokens = new string[MAX_COOKIE_TOKENS];
/**
* Number of cookie tokens currently in the tokens[] array.
*/
private int numTokens = 0;
/**
* Parse a name=value pair from the Cookie: header.
*
* @param cookies The Cookie: header to parse
* @param beginIndex The index in cookies to begin parsing from, inclusive
*/
private int parseNameValue(string cookies, int beginIndex)
{
int length = cookies.Length;
int index = beginIndex;
while (index < length)
{
switch (cookies[index])
{
case ';':
case ',':
// Found end of name token without value
tokens[numTokens] = cookies.Substring(beginIndex, index - beginIndex).Trim();
if (tokens[numTokens].Length > 0)
{
numTokens++;
tokens[numTokens] = null;
numTokens++;
}
return index + 1;
case '=':
// Found end of name token with value
tokens[numTokens] = cookies.Substring(beginIndex, index - beginIndex).Trim();
numTokens++;
return parseValue(cookies, index + 1);
case '"':
// Skip past quoted span
do index++; while (cookies[index] != '"');
break;
}
index++;
}
if (index > beginIndex)
{
// Found end of name token without value
tokens[numTokens] = cookies.Substring(beginIndex, index - beginIndex).Trim();
if (tokens[numTokens].Length > 0)
{
numTokens++;
tokens[numTokens] = null;
numTokens++;
}
}
return index;
}
/**
* Parse the name=value tokens from a Cookie: header.
*
* @param cookies The Cookie: header to parse
*/
public int tokenize(string cookies)
{
numTokens = 0;
if (cookies != null)
{
try
{
// Advance through cookies, parsing name=value pairs
int length = cookies.Length;
int index = 0;
while (index < length)
index = parseNameValue(cookies, index);
}
catch (Exception e)
{
// Filled up the tokens[] array
}
//catch (IndexOutOfBoundsException e)
//{
// // Walked off the end of the cookies header
//}
}
return numTokens;
}
/**
* Return the number of cookie tokens parsed from the Cookie: header.
*/
public int getNumTokens()
{
return numTokens;
}
/**
* Returns a given cookie token from the Cookie: header.
*
* @param index The index of the cookie token to return
*/
public String tokenAt(int index)
{
return tokens[index];
}
/**
* Parse the value token from a name=value pair.
*
* @param cookies The Cookie: header to parse
* @param beginIndex The index in cookies to begin parsing from, inclusive
*/
private int parseValue(String cookies, int beginIndex)
{
int length = cookies.Length;
int index = beginIndex;
while (index < length)
{
switch (cookies[index])
{
case ';':
case ',':
// Found end of value token
tokens[numTokens] = cookies.Substring(beginIndex, index - beginIndex).Trim();
numTokens++;
return index + 1;
case '"':
// Skip past quoted span
do index++; while (cookies[index] != '"');
break;
}
index++;
}
// Found end of value token
tokens[numTokens] = cookies.Substring(beginIndex, index - beginIndex).Trim();
numTokens++;
return index;
}
}
static Regex regExpires = new Regex(@"expires\=[\s]*?[\w]+.+?(?=(,|;|[\w-]+\=|$))", RegexOptions.IgnoreCase);
static Regex regHttpOnly = new Regex(@"httponly([\s]*?|,)", RegexOptions.IgnoreCase);
static CookieTokenizer ct = new CookieTokenizer();
static string[] systemKey = new string[] {
"expires","domain","path","max-age","version"
};
static public List<Cookie> ParseCookieHeader(string cookieHeader, string defaultDomain)
{
List<Cookie> cList = new List<Cookie>();
var masExpires = regExpires.Matches(cookieHeader);
foreach (Match maExpires in masExpires)
{
string dateText = maExpires.Value.Trim().Substring(8, maExpires.Value.Trim().Length - 8);
if (dateText.IndexOf(",") == dateText.Length - 1 || dateText.IndexOf(";") == dateText.Length - 1)
{
dateText = dateText.Substring(0, dateText.Length - 1);
}
if (dateText.IndexOf(",") == 3)
{
dateText = dateText.Substring(3, dateText.Length - 3);
}
DateTime date = DateTime.Parse(dateText);
cookieHeader = cookieHeader.Replace(maExpires.Value, "expires=" + date.Ticks.ToString() + ";");
}
cookieHeader = regHttpOnly.Replace(cookieHeader, "");
int count = ct.tokenize(cookieHeader);
string key = "";
Cookie cookie = null;
for (int i = 0; i < count; i++)
{
if (i % 2 == 0)
{
key = ct.tokens[i];
}
else
{
if (key != "")
{
if (!systemKey.Contains(key.ToLower()))
{
cookie = new Cookie();
cookie.Name = key;
cookie.Value = ct.tokens[i];
cookie.Path = "/";
cookie.Expires = DateTime.Now.AddDays(1);
cookie.Domain = defaultDomain;
if (cList.Count(p => p.Name.ToLower().Trim() == key.ToLower().Trim()) > 0)
{
cList.Remove(cList.Where(p => p.Name.ToLower().Trim() == key.ToLower().Trim()).Take(1).Single());
}
cList.Add(cookie);
}
else
{
if (cookie != null)
{
if (key.ToLower() == systemKey[0])
{
try
{
cookie.Expires = cookie.Expires.AddMilliseconds(double.Parse(ct.tokens[i]) / 10000);
}
catch { cookie.Expires = DateTime.Now.AddDays(1); }
}
else if (key.ToLower() == systemKey[1])
{
cookie.Domain = ct.tokens[i];
}
else if (key.ToLower() == systemKey[2])
{
cookie.Path = ct.tokens[i];
}
else if (key.ToLower() == systemKey[3])
{
try
{
cookie.Expires = cookie.Expires.AddSeconds(double.Parse(ct.tokens[i]));
}
catch { cookie.Expires = DateTime.Now.AddDays(1); }
}
}
}
}
}
}
return cList;
}
List<Cookie> cookieList = new List<Cookie>();
public void SetCookie(CookieCollection cookies)
{
foreach (Cookie cookie in cookies)
{
if (cookieList.Count(p => p.Name == cookie.Name && p.Domain.ToLower() == cookie.Domain.ToLower()) > 0)
{
var tc = cookieList.Where(p => p.Name == cookie.Name && p.Domain.ToLower() == cookie.Domain.ToLower()).Take(1).Single();
cookieList.Remove(tc);
}
cookieList.Add(cookie);
}
}
public void SetCookie(List<Cookie> cookies)
{
CookieCollection cc = new CookieCollection();
foreach (Cookie cookie in cookies)
{
cc.Add(cookie);
}
SetCookie(cc);
}
public void SetCookie(string cookieHeader, string defaultDomain)
{
SetCookie(ParseCookieHeader(cookieHeader, defaultDomain));
}
public void SetCookie(Cookie ck)
{
CookieCollection cc = new CookieCollection();
cc.Add(ck);
SetCookie(cc);
}
public string GetCookieHeader(string host)
{
var whe = GetCookies(host);
return GetCookieString(whe);
}
static Regex regDomain = new Regex(@"[\w]+\.(org\.cn|net\.cn|com\.cn|com|net|org|gov|cc|biz|info|cn|hk)+$");
public CookieCollection GetCookies(string serverHost)
{
List<string> domainList = new List<string>();
string domain = regDomain.Match(serverHost).Value;
string host = serverHost.ToLower().Replace("www.", "");
host = host.Replace(domain, "domain");
string[] pars = host.Split('.');
if (pars.Length > 1)
{
string tmp = "";
for (int i = pars.Length - 1; i > -1; i--)
{
if (pars[i] == "domain")
continue;
tmp = pars[i] + "." + tmp;
domainList.Add(tmp + domain);
domainList.Add("." + tmp + domain);
}
}
domainList.Add(serverHost);
domainList.Add(domain);
domainList.Add("." + domain);
CookieCollection cc = new CookieCollection();
var whe = cookieList.Where(p => domainList.Contains(p.Domain.ToLower()));
foreach (var cookie in whe)
{
cc.Add(cookie);
}
return cc;
}
public CookieCollection Convert(List<Cookie> cks)
{
CookieCollection cc = new CookieCollection();
foreach (Cookie item in cks)
{
cc.Add(item);
}
return cc;
}
public List<Cookie> Convert(CookieCollection cks)
{
List<Cookie> cc = new List<Cookie>();
foreach (Cookie item in cks)
{
cc.Add(item);
}
return cc;
}
private string GetCookieString(CookieCollection cks)
{
StringBuilder strCookie = new StringBuilder();
foreach (Cookie cookie in cks)
{
strCookie.Append(cookie.Name);
strCookie.Append("=");
strCookie.Append(cookie.Value);
strCookie.Append("; ");
}
if (strCookie.Length > 3)
strCookie = strCookie.Remove(strCookie.Length - 2, 2);
return strCookie.ToString();
}
public void SetAllCookieToDomain(string domain)
{
var atCookie = Convert(GetCookies(domain));
var needCookie = cookieList.Where(p => !atCookie.Contains(p)).ToArray();
for (int i = 0; i < needCookie.Length; i++)
{
Cookie item = needCookie[i];
cookieList.Add(new Cookie()
{
Domain = domain,
Expired = item.Expired,
Expires = item.Expires,
Name = item.Name,
Path = item.Path,
Value = item.Value
});
}
}
public void Clear()
{
cookieList.Clear();
}
public void RemoveCookie(string name, string doamin = null)
{
var cks = new List<Cookie>();
var temp = new Cookie[cookieList.Count];
cookieList.CopyTo(temp);
cks = temp.ToList();
if (!string.IsNullOrEmpty(doamin))
{
cks = Convert(GetCookies(doamin));
}
foreach (Cookie cookie in cks)
{
if (cookie.Name.Trim() == name.Trim())
{
cookieList.Remove(cookie);
}
}
}
public void SetIECookie(string host = null)
{
var cks = cookieList;
if (!string.IsNullOrEmpty(host))
{
cks = Convert(GetCookies(host));
}
foreach (var cookie in cookieList)
{
for (int i = 0; i < 5 && !WinAPI.InternetSetCookie("http://" + (cookie.Domain.StartsWith(".") ? "www" : "") + cookie.Domain, cookie.Name, cookie.Value + ";expires=" + cookie.Expires.ToGMTString()); i++) ;
}
}
}
abstract public class WebAction
{
/* 类名:web操作基础支持类
* 描述:提供web操作基础接口
* 创建日期:2011-10-25
* 版本:0.4
* 作者:by rolends986
*/
/*
* 版本更新记录
* 0.1 基本代码的构建与调试
* 0.2 修改主入口方法,实现多参数化定义 2011-11-1
* 0.3 添加SetUseUnsafeHeaderParsing功能,修订lock逻辑,删除url编码逻辑(会导致部分服务器header解析问题) 2011-12-2
* 0.4 新增代理控制逻辑,修改useUnsafeHeaderParsing参数,添加资源释放逻辑 2011-12-12
*/
static WebAction()
{
DefaultConnectionLimit = 1000;
KeepAliveTime = 10 * 1000;
KeepAliveInterval = 300;
}
protected CookieManager _cookieManager = new CookieManager();
protected XQSoft.Common.Log LogObject { get { return LogManager.Logs[LogName]; } }
string _logName = "";
virtual protected string LogName { get { return _logName; } }
public WebAction()
{
}
public WebAction(string logName)
{
_logName = logName;
}
public const string _userAgent_FF = " Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko/20100101 Firefox/11.0";
public const string _userAgent_IE = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET4.0C; .NET4.0E)";
public const string _defaultAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
public const string _defaultAcceptLanguage = "zh-cn,zh;q=0.5";
public const string _defaultAcceptCharset = "GB2312,utf-8;q=0.7,*;q=0.7";
public static int DefaultConnectionLimit { get; set; }
static public int KeepAliveTime { get; set; }
static public int KeepAliveInterval { get; set; }
public bool EnableProxy { get; set; }
ProxyInfo _proxyInfo = null;
public ProxyInfo ProxyInfo { get { return _proxyInfo; } protected set { _proxyInfo = value; } }
public string Key { get { return new Guid().ToString(); } }
static object sslLock = new object();
static public bool CheckValidationResult(object sender, X509Certificate certificate, X509Chain chain, SslPolicyErrors errors)
{
//if (sender is HttpWebRequest)
//{
// var request = sender as HttpWebRequest;
// if (request.ClientCertificates.Count > 0)
// return false;
//}
return true; // Always accept
}
virtual protected void ChangeProxy()
{
_proxyInfo = ProxyManager.GetAvailableProxy(this.Key);
}
/*
新方法 by rolends1986
* 2011-10-27
1,支持自定义Content-Type
2,封装编码,自身维护表单键值转换
3,支持各种参数的自定义
4,实现自动编码
5,实现CA文件指定
*/
virtual protected T GetWebData<T>(string url,
string charset = null,
string referer = null,
PostInfo postInfo = null,
bool? useProxy = null,
NameValueCollection headers = null,
string userAgent = null,
Certificates certificates = null,
Version protocol = null,
bool? allowAutoRedirect = false,
bool? keepAlive = null,
string accept = null,
string acceptLanguage = null,
string acceptCharset = null,
string urlEncoding = null,
RequestCachePolicy cachePolicy = null)
{
System.Net.ServicePointManager.DefaultConnectionLimit = DefaultConnectionLimit;
//System.Net.ServicePointManager.SetTcpKeepAlive(true, KeepAliveTime, KeepAliveInterval);
//SetUseUnsafeHeaderParsing(useUnsafeHeaderParsing);
var uri = new Uri(url);
//url = EncodeUrl(url, urlEncoding);
var request = (HttpWebRequest)WebRequest.Create(url);
request.ServicePoint.Expect100Continue = false;
request.Proxy = null;
if (useProxy.HasValue)
{
if (useProxy.Value)
{
SetProxy(request);
}
}
else
{
if (EnableProxy)
{
SetProxy(request);
}
}
#region set default
request.KeepAlive = false;
request.AllowAutoRedirect = false;
request.UserAgent = _userAgent_FF;
request.Accept = _defaultAccept;
request.Headers.Add(HttpRequestHeader.AcceptLanguage, _defaultAcceptLanguage);
request.Headers.Add(HttpRequestHeader.AcceptCharset, _defaultAcceptCharset);
request.CachePolicy = new System.Net.Cache.RequestCachePolicy(System.Net.Cache.RequestCacheLevel.NoCacheNoStore);
request.Method = "get";
#endregion
if (url.ToLower().IndexOf("https") == 0)
{
if (certificates != null)
{
X509CertificateCollection crts = null;
if (certificates.IsAuto)
{
crts = GetCertificates(uri);
}
else
{
crts = certificates.CertificateCollection;
}
if (crts == null) ThrowException(505, url);
request.ClientCertificates = crts;
}
request.ProtocolVersion = HttpVersion.Version10;
}
//request.Host = uri.Host;
if (allowAutoRedirect.HasValue) request.AllowAutoRedirect = allowAutoRedirect.Value;
//if (keepAlive.HasValue) request.KeepAlive = keepAlive.Value;//由于手动释放了资源,keepalive设置不再有效
if (!string.IsNullOrEmpty(userAgent)) request.UserAgent = userAgent;
if (!string.IsNullOrEmpty(accept)) request.Accept = accept;
if (!string.IsNullOrEmpty(acceptLanguage))
{
if (request.Headers[HttpRequestHeader.AcceptLanguage] == null)
request.Headers.Add(HttpRequestHeader.AcceptLanguage, acceptLanguage);
else
request.Headers[HttpRequestHeader.AcceptLanguage] = acceptLanguage;
}
if (!string.IsNullOrEmpty(acceptCharset))
{
if (request.Headers[HttpRequestHeader.AcceptCharset] == null)
request.Headers.Add(HttpRequestHeader.AcceptCharset, acceptCharset);
else
request.Headers[HttpRequestHeader.AcceptCharset] = acceptCharset;
}
if (!string.IsNullOrEmpty(referer)) request.Referer = referer;
if (cachePolicy != null) request.CachePolicy = cachePolicy;
if (protocol != null) request.ProtocolVersion = protocol;
try
{
if (headers != null)
foreach (var nv in headers.AllKeys)
{
request.Headers.Add(nv, headers[nv]);
}
}
catch (Exception ex)
{
DisposeRequest(request);
//request header error 502
ThrowException(502, ex.Message);
}
string requestCookie = _cookieManager.GetCookieHeader(uri.Host);
if (!String.IsNullOrEmpty(requestCookie))
{
request.Headers.Add(HttpRequestHeader.Cookie, requestCookie);
}
if (postInfo != null)
{
request.Method = "post";
byte[] byteArray = postInfo.GetPostData();
request.ContentType = postInfo.GetContentType();
request.ContentLength = byteArray.Length;
Stream dataStream = request.GetRequestStream();
dataStream.Write(byteArray, 0, byteArray.Length);
dataStream.Close();
}
WebResponse response = null;
try
{
if (url.ToLower().IndexOf("https") == 0)
{
lock (sslLock)
{
if (certificates != null)
{
if (ServicePointManager.ServerCertificateValidationCallback != null)
ServicePointManager.ServerCertificateValidationCallback -= CheckValidationResult;
}
else
{
if (ServicePointManager.ServerCertificateValidationCallback == null)
ServicePointManager.ServerCertificateValidationCallback += CheckValidationResult;
}
response = request.GetResponse();
}
}
else
{
response = request.GetResponse();
}
}
catch (Exception ex)
{
DisposeRequest(request);
DisposeResponse(response);
//get response error 503
ThrowException(503, ex.Message);
}
string cookie = response.Headers.Get("Set-Cookie");
if (!String.IsNullOrEmpty(cookie))
{
_cookieManager.SetCookie(cookie, uri.Host);
}
var sm = response.GetResponseStream();
if (typeof(T) == typeof(string))
{
if (!String.IsNullOrEmpty(response.Headers["Content-Type"]))
{
string[] ct = response.Headers["Content-Type"].Split(';');
if (ct.Length > 1)
{
charset = ct[1].Split('=')[1];//set server response encoding
}
}
string html = GetHtml(sm, charset);
T result = (T)(object)html;
DisposeRequest(request);
DisposeResponse(response);
return result;
}
else if (typeof(Image) == typeof(T))
{
try
{
Image original = Image.FromStream(sm);
T result = (T)(object)original;
DisposeRequest(request);
DisposeResponse(response);
return result;
}
catch (Exception ex)
{
//to image error 504
DisposeRequest(request);
DisposeResponse(response);
ThrowException(504, ex.Message);
return default(T);
}
}
else if (typeof(ResponseLocation) == typeof(T))
{
ResponseLocation rl = new ResponseLocation()
{
Html = GetHtml(sm, charset),
Url = response.Headers["Location"]
};
T result = (T)(object)rl;
DisposeRequest(request);
DisposeResponse(response);
return result;
}
else
{
T result = (T)(object)GetData(sm);
DisposeRequest(request);
DisposeResponse(response);
return result;
}
}
private void DisposeResponse(WebResponse response)
{
try
{
response.GetResponseStream().Close();
}
catch { }
try
{
response.Close();
}
catch { }
try
{
response = null;
}
catch { }
}
private void DisposeRequest(HttpWebRequest request)
{
try
{
try
{
request.GetRequestStream().Close();
}
catch { }
try
{
request.Abort();
}
catch { }
try
{
request = null;
}
catch { }
}
catch { }
}
private void SetProxy(HttpWebRequest request)
{
if (ProxyInfo == null)
ThrowException(533, "代理实例为空,请先实例化");
request.Proxy = new WebProxy(ProxyInfo.IPAddress.ToString(), ProxyInfo.Port);
}
public static bool SetUseUnsafeHeaderParsing(bool boolVal)
{
try
{
Assembly assem = Assembly.GetAssembly(typeof(System.Net.Configuration.SettingsSection));
if (assem == null) return false;
Type assemType = assem.GetType("System.Net.Configuration.SettingsSectionInternal");
if (assemType == null) return false;
object obj = assemType.InvokeMember("Section", BindingFlags.Static | BindingFlags.GetProperty |
BindingFlags.NonPublic, null, null, new object[] { });
if (obj == null) return false;
FieldInfo fieldInfo = assemType.GetField("useUnsafeHeaderParsing", BindingFlags.NonPublic | BindingFlags.Instance);
if (fieldInfo == null) return false;
fieldInfo.SetValue(obj, boolVal);
}
catch { }
return true;
}
private string EncodeUrl(string url, string code)
{
if (string.IsNullOrEmpty(code))
return url;
Encoding urlCode = Encoding.ASCII;
if (!String.IsNullOrEmpty(code))
{
urlCode = Encoding.GetEncoding(code);
}
int pIndex = url.IndexOf('?');
if (url.Length - 1 <= pIndex)
return url;
if (pIndex > 1)
{
string[] its = url.Substring(pIndex + 1).Split('&');
StringBuilder np = new StringBuilder();
foreach (var nv in its)
{
string name = "";
string value = "";
int cIndex = nv.IndexOf("=");
if (cIndex < 0)
name = nv;
else
{
name = nv.Substring(0, cIndex);
if (nv.Length - 1 > cIndex)
value = nv.Substring(cIndex + 1);
}
np.Append(UrlUnit.UrlEncode(name, urlCode));
np.Append("=");
np.Append(UrlUnit.UrlEncode(value, urlCode));
np.Append("&");
}
url = url.Substring(0, pIndex + 1) + np.Remove(np.Length - 1, 1).ToString();
}
return url;
}
public byte[] GZipDecompress(byte[] gzip)
{
using (GZipStream stream = new GZipStream(new MemoryStream(gzip),
CompressionMode.Decompress))
{
const int size = 4096;
byte[] buffer = new byte[size];
using (MemoryStream memory = new MemoryStream())
{
int count = 0;
do
{
count = stream.Read(buffer, 0, size);
if (count > 0)
{
memory.Write(buffer, 0, count);
}
}
while (count > 0);
return memory.ToArray();
}
}
}
public byte[] DeflateDecompress(byte[] deflate)
{
using (DeflateStream stream = new DeflateStream(new MemoryStream(deflate),
CompressionMode.Decompress))
{
const int size = 4096;
byte[] buffer = new byte[size];
using (MemoryStream memory = new MemoryStream())
{
int count = 0;
do
{
count = stream.Read(buffer, 0, size);
if (count > 0)
{
memory.Write(buffer, 0, count);
}
}
while (count > 0);
return memory.ToArray();
}
}
}
private byte[] GetData(Stream sm)
{
byte[] realData = null;
byte[] buffer = new byte[1024 * 8];
int dataLength = 0;
do
{
dataLength = sm.Read(buffer, 0, buffer.Length);
if (realData == null)
{
realData = new byte[dataLength];
Array.Copy(buffer, realData, dataLength);
}
else
{
int oldLength = realData.Length;
Array.Resize<byte>(ref realData, realData.Length + dataLength);
Array.Copy(buffer, 0, realData, oldLength, dataLength);
}
}
while (dataLength > 0);
//return (T)(object)buffer.Take(dataLength).ToArray();
return realData;
}
private string GetHtml(Stream sm, string charset)
{
var data = GetData(sm);
string newCharset = string.IsNullOrEmpty(charset) ? "utf-8" : charset;
try
{
string r = Encoding.GetEncoding(newCharset).GetString(data);
if (string.IsNullOrEmpty(charset))
{
r = CheckEncoding(data, newCharset, r);
}
LogObject.WriteLine("==============================================\r\n");
LogObject.WriteLine(r);
LogObject.WriteLine("==============================================");
LogObject.WriteLine("******************************分割*************************");
return r;
}
catch (Exception ex)
{
//get response error 503
ThrowException(509, ex.Message);
return "";
}
}
protected static Regex regCharset = new Regex("(?<=<meta.+?content\\=.+?charset\\=).+?(?=(\\\"|[\\s]))", RegexOptions.IgnoreCase);
protected static Regex regCharset2 = new Regex("(?<=<meta[\\s]+charset=[\\\"]{0,1})[a-z0-9]+(?=[\\\"]{0,1})", RegexOptions.IgnoreCase);
private string CheckEncoding(byte[] data, string currentCharset, string html)
{
string pageCharset = "";
if (regCharset.IsMatch(html))
{
pageCharset = regCharset.Match(html).Value.Trim().ToLower();
}
if (regCharset2.IsMatch(html))
{
pageCharset = regCharset2.Match(html).Value.Trim().ToLower();
}
if (pageCharset != currentCharset.Trim().ToLower())
{
try
{
return Encoding.GetEncoding(pageCharset).GetString(data);
}
catch { }
}
return html;
}
virtual protected X509CertificateCollection GetCertificates(Uri uri)
{
X509CertificateCollection certs = new X509CertificateCollection();
string host = uri.Host;
for (int i = 0; i < 8; i++)
{
for (int j = 1; j <= 2; j++)
{
X509Store store = new X509Store((StoreName)(i + 1), (StoreLocation)j);
store.Open(OpenFlags.MaxAllowed);
foreach (var cert in store.Certificates)
{
Console.WriteLine(cert.Subject);
if (cert.Subject.ToLower().Contains(host.ToLower()))
{
certs.Add(cert);
}
}
}
}
return certs;
}
virtual protected void ThrowException(int errorCode, string sourceText)
{
throw XQException.GetException(errorCode, sourceText);
}
protected NameValueCollection GetQueryParameter(string url)
{
NameValueCollection pars = new NameValueCollection();
var paraString = url.Substring(url.IndexOf("?") + 1, url.Length - url.IndexOf("?") - 1).Split('&');
for (int i = 0; i < paraString.Length; i++)
{
var nv = paraString[i].Split('=');
var name = nv[0];
var value = nv[1];
pars.Add(name, value);
}
return pars;
}
}
转载请注明:http://www.cnblogs.com/Rolends
HAPPY EVERY DAY ! !