明天的明天 永远的永远 未知的一切 我与你一起承担 ??

是非成败转头空 青山依旧在 几度夕阳红 。。。
  博客园  :: 首页  :: 管理

正则抓取SINA天气预报数据!!!

Posted on 2007-01-11 13:31  且行且思  阅读(632)  评论(0编辑  收藏  举报

/// <summary>
        /// SINA天气预报 1天
        /// </summary>
        /// <returns></returns>
        public static XmlDataDocument GetSinaWeather()
        {
            //http://weather.news.sina.com.cn/images/figureWeather/map/northEast.html
            //http://weather.news.sina.com.cn/images/figureWeather/map/eastOfChina.html
            //http://weather.news.sina.com.cn/images/figureWeather/map/northOfChina.html
            //http://weather.news.sina.com.cn/images/figureWeather/map/southOfChina.html
            //http://weather.news.sina.com.cn/images/figureWeather/map/southWest.html
            //http://weather.news.sina.com.cn/images/figureWeather/map/northWest.html

            XmlDataDocument objXml = new XmlDataDocument();
            objXml.LoadXml("<root />");
            //东北
            string content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/northEast.html","gb2312");

            //抓到内容后,开始分析数据
            Regex regex;
            Match mc;
            XmlElement objXmlCityList = objXml.CreateElement("citylist");

            string partten = "城市:(?<city>[^<]+) <br> 天气:(?<weather>[^<]+)<br> 温度:(?<temperature>[^<]+)<br>风向:(?<windway>[^<]+)<br>风力:(?<wind>[^<]+) <br>";
            regex = new Regex(partten, RegexOptions.Compiled | RegexOptions.IgnoreCase);
            objXmlCityList.SetAttribute("vdatetime", DateTime.Now.ToShortDateString());
            //城市:哈尔滨市 <br> 天气:多云转晴<br> 温度:-10 ℃~-1 ℃<br>风向: 西风<br>风力:小于3级<br>
   int i= 1;
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(i + "、" + mc.Groups["city"].Value.Trim());
                    XmlElement objXmlElementCity = objXml.CreateElement("city");
                    objXmlElementCity.SetAttribute("orders", "1");
                    objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                    objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                    objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                    objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                    objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                    objXmlCityList.AppendChild(objXmlElementCity);
                }
                catch(Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
            }

            //东
            content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/eastOfChina.html", "gb2312");
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    XmlElement objXmlElementCity = objXml.CreateElement("city");
                    objXmlElementCity.SetAttribute("orders", "2");
                    objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                    objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                    objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                    objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                    objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                    objXmlCityList.AppendChild(objXmlElementCity);
                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
            }
            //北
            content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/northOfChina.html", "gb2312");
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(i + "、" + mc.Groups["city"].Value.Trim());
                    XmlElement objXmlElementCity = objXml.CreateElement("city");
                    objXmlElementCity.SetAttribute("orders", "3");
                    objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                    objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                    objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                    objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                    objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                    objXmlCityList.AppendChild(objXmlElementCity);
                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
            }

            //南
            content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/southOfChina.html", "gb2312");
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    XmlElement objXmlElementCity = objXml.CreateElement("city");
                    objXmlElementCity.SetAttribute("orders", "4");
                    objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                    objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                    objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                    objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                    objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                    objXmlCityList.AppendChild(objXmlElementCity);
                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
            }

            //南西
            content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/southWest.html", "gb2312");
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    XmlElement objXmlElementCity = objXml.CreateElement("city");
                    objXmlElementCity.SetAttribute("orders", "5");
                    objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                    objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                    objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                    objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                    objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                    objXmlCityList.AppendChild(objXmlElementCity);
                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
            }

            //北西
            content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/northWest.html", "gb2312");
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    XmlElement objXmlElementCity = objXml.CreateElement("city");
                    objXmlElementCity.SetAttribute("orders", "6");
                    objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                    objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                    objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                    objXmlElementCity.SetAttribute("windway", mc.Groups["windway"].Value.Trim());
                    objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                    objXmlCityList.AppendChild(objXmlElementCity);
                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
            }
            partten = "城市:(?<city>[^<]+) <br> 天气:(?<weather>[^<]+)<br> 温度:(?<temperature>[^<]+)<br>风力:(?<wind>[^<]+)<br>";
            regex = new Regex(partten, RegexOptions.Compiled | RegexOptions.IgnoreCase);
            //北西 城市:武汉 <br> 天气:小雨转阴<br> 温度:10 ℃~16 ℃<br>风力:小于3级<br>
            content = GetContent("http://weather.news.sina.com.cn/images/figureWeather/map/wholeNation.html", "gb2312");
            for (mc = regex.Match(content), i = 1; mc.Success; mc = mc.NextMatch(), i++)
            {
                try
                {
                    XmlElement objXmlElementCity = objXml.CreateElement("city");
                    objXmlElementCity.SetAttribute("orders", "7");
                    objXmlElementCity.SetAttribute("city", mc.Groups["city"].Value.Trim());
                    objXmlElementCity.SetAttribute("weather", mc.Groups["weather"].Value.Trim());
                    objXmlElementCity.SetAttribute("temperature", mc.Groups["temperature"].Value.Trim());
                    objXmlElementCity.SetAttribute("windway", "");
                    objXmlElementCity.SetAttribute("wind", mc.Groups["wind"].Value.Trim());
                    objXmlCityList.AppendChild(objXmlElementCity);
                }
                catch (Exception ex)
                {
                    Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
                }
            }

            objXml.DocumentElement.AppendChild(objXmlCityList);

            return objXml;
        }

 /// <summary>
        /// 抓取页面接口
        /// </summary>
        /// <param name="url"></param>
        /// <returns></returns>
        private static string GetContent(string url, string encoding)
        {
            string str = "";
            WebClient client = new WebClient();
            client.Headers.Add("Accept", "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/x-shockwave-flash, application/vnd.ms-excel, application/vnd.ms-powerpoint, application/msword, */*");
            client.Headers.Add("Accept-Language", "zh-cn");
            client.Headers.Add("UA-CPU", "x86");
            client.Headers.Add("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)");
            try
            {
                byte[] buffer = client.DownloadData(url);
                if (encoding == "utf-8")
                {
                    str = System.Text.Encoding.GetEncoding("utf-8").GetString(buffer, 0, buffer.Length);
                }
                else
                {
                    str = System.Text.Encoding.GetEncoding("gb2312").GetString(buffer, 0, buffer.Length);
                }
            }
            catch (Exception ex)
            {
                Yesun.Edzh.BLL.Log.LogHelper.WriteError(ex.Message);
            }
            return str;
        }