C# 自动登录网页,浏览页面【转载】
需求:客户的数据同时存在在另外一个不可控的系统中,需要和当前系统同步。
思路:自动登录另外一个系统,然后抓取数据,同步到本系统中。
技术点:模拟用户登录;保存登录状态;抓取数据
/// <summary> /// visit the target url /// </summary> /// <param name="targetURL"></param> /// <param name="cc">this is for keeping cookies and sessions</param> /// <param name="param">this is the data need post inside form</param> /// <returns>html page</returns> public static string PostAndGetHTML(string targetURL,CookieContainer cc,Hashtable param) { //prepare the submit data string formData = ""; foreach (DictionaryEntry de in param) { formData += de.Key.ToString() + "=" + de.Value.ToString() + "&"; } if (formData.Length > 0) formData = formData.Substring(0, formData.Length - 1); //remove last '&' ASCIIEncoding encoding = new ASCIIEncoding(); byte[] data = encoding.GetBytes(formData); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL); request.Method = "POST"; //post request.ContentType = "application/x-www-form-urlencoded"; request.ContentLength = data.Length; request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.1124)"; Stream newStream = request.GetRequestStream(); newStream.Write(data, 0, data.Length); newStream.Close(); request.CookieContainer = cc; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); cc.Add(response.Cookies); Stream stream = response.GetResponseStream(); string result = new StreamReader(stream, System.Text.Encoding.Default).ReadToEnd(); return result; } public static DataTable ConvertToDT(DataTable dt, string tableHTML) { int lastTD = tableHTML.ToLower().LastIndexOf("</td>"); int firstRow = tableHTML.ToLower().IndexOf("<tr") + 3;//after ""<tr int index = tableHTML.ToLower().IndexOf("<tr", firstRow) + 3;//after ""<tr while (index < lastTD) { DataRow dr = dt.NewRow(); for (int i = 0; i < dt.Columns.Count; i++) { string value = ""; int startTD = tableHTML.ToLower().IndexOf("<td", index) + 3;//after "<td" int endTD = tableHTML.ToLower().IndexOf("</td>", startTD); if (endTD < 0) break; string tdStr = tableHTML.Substring(startTD, endTD - startTD); //remove <> and others tdStr = tdStr.Replace(" ", "").Replace("\t", "").Replace("\r", ""); string[] v = tdStr.Split('<', '>'); for (int j = 0; j < v.Length; j++) { j++; if (v[j].Trim() != "") { value = v[j].Trim(); break; } } // dr[i] = value; index = endTD; } dt.Rows.Add(dr); } return dt; }
这一个是调用的例子:先登录,在查询。 实际中这个逻辑可能有很多步骤
CookieContainer cc = new CookieContainer();//this is for keep the Session and Cookie Hashtable param = new Hashtable();//this is for keep post data. string urlLogin = "http://demo.server//login.asp"; //do find the elementId that needed. check the source of login page can get this information param.Add("User", "xxx"); param.Add("Password", "xxxx"); string result =GrabHelper.PostAndGetHTML(urlLogin, cc, param); //check result, whether login success //if login success, goto the target url, and input some value. string url2 = " http://demo.server/query.asp?id=1";// need change. special logic param.Clear(); //param.Add("SearchAreaId","JobId") result = GrabHelper.PostAndGetHTML(url2, cc, new Hashtable()); //ConvertToDT the html or do something others