C# 自动登录网页,浏览页面【转载】

需求:客户的数据同时存在在另外一个不可控的系统中,需要和当前系统同步。

思路:自动登录另外一个系统,然后抓取数据,同步到本系统中。

技术点:模拟用户登录;保存登录状态;抓取数据

/// <summary>
        /// visit the target url
        /// </summary>
        /// <param name="targetURL"></param>
        /// <param name="cc">this is for keeping cookies and sessions</param>
        /// <param name="param">this is the data need post inside form</param>
        /// <returns>html page</returns>
        public static string PostAndGetHTML(string targetURL,CookieContainer cc,Hashtable param)
        {
            //prepare the submit data
            string formData = "";
            foreach (DictionaryEntry de in param)
            {
                formData += de.Key.ToString() + "=" + de.Value.ToString() + "&";
            }
            if (formData.Length > 0)
                formData = formData.Substring(0, formData.Length - 1); //remove last '&'

            ASCIIEncoding encoding = new ASCIIEncoding();
            byte[] data = encoding.GetBytes(formData);

            HttpWebRequest request = (HttpWebRequest)WebRequest.Create(targetURL);
            request.Method = "POST";    //post
            request.ContentType = "application/x-www-form-urlencoded";
            request.ContentLength = data.Length;
            request.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; SV1; .NET CLR 2.0.1124)";

            Stream newStream = request.GetRequestStream();
            newStream.Write(data, 0, data.Length);

            newStream.Close();

            request.CookieContainer = cc;
            HttpWebResponse response = (HttpWebResponse)request.GetResponse();
            cc.Add(response.Cookies);
            Stream stream = response.GetResponseStream();
            string result = new StreamReader(stream, System.Text.Encoding.Default).ReadToEnd();
            return result;
        }

        public static DataTable ConvertToDT(DataTable dt, string tableHTML)
        {

            int lastTD = tableHTML.ToLower().LastIndexOf("</td>");
            int firstRow = tableHTML.ToLower().IndexOf("<tr") + 3;//after ""<tr
            int index = tableHTML.ToLower().IndexOf("<tr", firstRow) + 3;//after ""<tr
            while (index < lastTD)
            {
                DataRow dr = dt.NewRow();
                for (int i = 0; i < dt.Columns.Count; i++)
                {
                    string value = "";
                    int startTD = tableHTML.ToLower().IndexOf("<td", index) + 3;//after "<td"
                    int endTD = tableHTML.ToLower().IndexOf("</td>", startTD);
                    if (endTD < 0)
                        break;
                    string tdStr = tableHTML.Substring(startTD, endTD - startTD);

                    //remove <> and others
                    tdStr = tdStr.Replace("&nbsp;", "").Replace("\t", "").Replace("\r", "");
                    string[] v = tdStr.Split('<', '>');
                    for (int j = 0; j < v.Length; j++)
                    {
                        j++;
                        if (v[j].Trim() != "")
                        {
                            value = v[j].Trim();
                            break;
                        }
                    }
                    //
                    dr[i] = value;
                    index = endTD;
                }
                dt.Rows.Add(dr);

            }
            return dt;
        }

 这一个是调用的例子:先登录,在查询。 实际中这个逻辑可能有很多步骤

CookieContainer cc = new CookieContainer();//this is for keep the Session and Cookie
            Hashtable param = new Hashtable();//this is for keep post data.

            string urlLogin = "http://demo.server//login.asp";
            //do find the elementId that needed. check the source of login page can get this information
            param.Add("User", "xxx");
            param.Add("Password", "xxxx");
            string result =GrabHelper.PostAndGetHTML(urlLogin, cc, param);
            //check result, whether login success

            //if login success, goto the target url, and input some value.
            string url2 = " http://demo.server/query.asp?id=1";// need change. special logic
            param.Clear();
            //param.Add("SearchAreaId","JobId")
            result = GrabHelper.PostAndGetHTML(url2, cc, new Hashtable());
            //ConvertToDT the html or do something others
posted @ 2014-10-16 10:55  dekevin  阅读(1545)  评论(0编辑  收藏  举报