C# 在采集数据时的验证与登录处理
首先打开网站,查看源文件,找到他的登录表单部分。
比如:
<form name="login" action="loginMain.jsp" method="POST" target="_top">
<table width="218" border="0" cellspacing="0" cellpadding="0">
<tr>
<td width="50" height="28" class="hui"><div align="right">用户名:</div></td>
<td width="168" height="28"><input class="hui" id="username"
maxlength="40" size="23" name="username" id="username" /></td>
</tr>
<tr>
<td height="28" class="hui"><div align="right">密 码:</div></td>
<td height="28"><input class="hui" id="passwd"
maxlength="40" size="23" name="passwd" type="password" id="passwd" /></td>
</tr>
</table>
</form>
从以上表单可以看出,表单提交的方法是:POST,提交至loginMain.jsp处理,共有两个表单项即:username、passwd
下面是C#模仿登录程序:Login.cs
using System.Data;
using System.Net;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
/// <summary>
/// 登录网站并获取Cookies
/// </summary>
/// <returns>成功登录的Cookie信息</returns>
public static CookieContainer Get_Login()
{
CookieContainer cc = new CookieContainer();
string FormURL="http://blog.hnce.net/loginMain.jsp"; //处理表单的绝对URL地址
string FormData = "username=slick&passwd=hedy12345"; //表单需要提交的参数,注意改为你已注册的信息。
ASCIIEncoding encoding = new ASCIIEncoding();
byte[] data = encoding.GetBytes(FormData);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(FormURL);
request.Method = "POST"; //数据提交方式
request.ContentType = "application/x-www-form-urlencoded";
request.ContentLength = data.Length;
request.UserAgent = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)";
//模拟一个UserAgent
Stream newStream = request.GetRequestStream();
newStream.Write(data, 0, data.Length);
newStream.Close();
request.CookieContainer = cc;
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
cc.Add(response.Cookies);
Stream stream = response.GetResponseStream();
string WebContent = new StreamReader(stream, System.Text.Encoding.Default).ReadToEnd();
return cc;
}
调用以上的方法来获取需要登录才能查看的内容。
cc = Login.Get_Login(); //获取登录Cookies
string PhotoClassURL = "http://blog.hnce.net/xxx.jsp";
HttpWebRequest Myrequest = (HttpWebRequest)WebRequest.Create(PhotoClassURL);
Myrequest.CookieContainer = cc;
HttpWebResponse Myresponse = (HttpWebResponse)Myrequest.GetResponse();
cc.Add(Myresponse.Cookies);
Stream Mystream = Myresponse.GetResponseStream();
string sHtml = new StreamReader(Mystream, System.Text.Encoding.Default).ReadToEnd();
sHtml即为你登录之后看到的内容。