天气预报爬虫例子源码

这是一个通过python中TkInter,Requests,Re模块实现的天气预报爬虫程序,软件界面为图形界面,软件运行界面如下:

 

 

 

操作方法只需要在查询城市编辑框输入完整的城市名称或完整拼音字母,比如深圳可以输入shenzhen,然后点查询就可查询,目前程序实现了国内和国际的天气预报查询,国内8日内天气预报查询,国外实现7日内天气预报查询功能,

同时支持软件开启自动显示当前城市天气预报查询功能:

源代码如下:

  1 # *_* coding:utf-8 *_*
  2 
  3 # 开发团队:中国软件开发团队
  4 # 开发人员:Administrator
  5 # 开发时间:2019/3/23 5:16
  6 # 文件名称:weatherSpider
  7 # 开发工具:PyCharm
  8 
  9 
 10 import tkinter
 11 import tkinter.messagebox
 12 from tkinter import ttk
 13 import requests
 14 # from PIL import ImageTk as itk
 15 from selenium import webdriver
 16 # from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 17 from selenium.webdriver.chrome.options import Options
 18 import re
 19 
 20 
 21 '''
 22 获取本地所在城市名称
 23 '''
 24 
 25 
 26 def get_local_city():
 27     chrome_options = Options()
 28     chrome_options.add_argument('--headless')
 29     chrome_options.add_argument('--disable-gpu')
 30     #
 31     # 更换头部
 32     chrome_options.add_argument(
 33         'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"')
 34 
 35     driver = webdriver.Chrome(executable_path='./chromedriver.exe', chrome_options=chrome_options)
 36     driver.get("http://www.weather.com.cn")
 37     text = driver.page_source
 38 
 39     result = re.findall('<span class="city_name"><em>(.*?)</em></span>', text, re.S)
 40     driver.close()
 41     return result[0]
 42 
 43 
 44 class MyFrame(tkinter.Frame):
 45     def __init__(self, default_city):
 46         self.root = tkinter.Tk()
 47 
 48         self.root.title("天气查询")
 49         self.root.geometry('1200x700+400+220')
 50         # 修改默认应用程序图标
 51         self.root.iconbitmap('camero.ico')
 52 
 53         bg = tkinter.Canvas(self.root, width=1200, height=600, bg='white')
 54         # self.img = itk.PhotoImage(file="bg.jpg")
 55         bg.place(x=100, y=40)
 56         # bg.create_image(0, 0, anchor=tkinter.NW, image=self.img)
 57 
 58         self.city = tkinter.Entry(self.root, width=16, font=("仿宋", 18, "normal"))
 59         self.city.place(x=200, y=60)
 60         self.city.insert(0, default_city)
 61 
 62         citylabel = tkinter.Label(self.root, text='查询城市', font=("仿宋", 18, "normal"))
 63         citylabel.place(x=80, y=60)
 64 
 65         # 查询按钮
 66         chaxun = tkinter.Button(self.root, width=10, height=3, text="查询", bg='#00CCFF', bd=5, font="bold",command=self.search)
 67        
 68         chaxun.place(x=800, y=50)
 69 
 70         # 清除按钮
 71         clearbtn = tkinter.Button(self.root, width=10, height=3, text="清除", bg='#00CCFF', bd=5, font="bold",command=self.clear)
 72       
 73         clearbtn.place(x=950, y=50)
 74 
 75         poslabel = tkinter.Label(self.root, text='选择位置', font=("仿宋", 18, "normal"))
 76         poslabel.place(x=80, y=100)
 77 
 78         comvalue = tkinter.StringVar()  # 窗体自带的文本,新建一个值
 79         self.comboxlist = ttk.Combobox(self.root, width=30, height=18, font=("仿宋", 18, "normal"),
 80                                        textvariable=comvalue)  # 初始化
 81         self.comboxlist["values"] = ("1", "2", "3")
 82         self.comboxlist.current(0)  # 选择第一个
 83         self.comboxlist.bind("<<ComboboxSelected>>", self.choose)  # 绑定事件,(下拉列表框被选中时,绑定choose()函数)
 84         self.comboxlist.place(x=200, y=100)
 85 
 86         self.result = tkinter.Listbox(self.root, heigh=18, width=65, font=("仿宋", 20, "normal"))  # 显示天气框
 87         self.result.place(x=125, y=150)
 88 
 89         self.citys = []
 90 
 91         self.headers = {
 92             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
 93             'Cookie': '__guid=182823328.3322839646442213000.1543932524694.901; vjuids=1858d43b6.167798cbdb7.0.8c4d7463d5c5d; vjlast=1543932526.1543932526.30; userNewsPort0=1; f_city=%E5%B9%B3%E9%A1%B6%E5%B1%B1%7C101180501%7C; Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b=1543932526,1543932551,1543932579; Wa_lvt_1=1547464114,1547464115,1547880054,1547983123; defaultCty=101181001; defaultCtyName=%u5546%u4E18; monitor_count=6; Wa_lpvt_1=1547983809'
 94         }
 95 
 96         # 开启本地天气查询
 97         if (default_city != ''):
 98             self.tianqiforecast(default_city)
 99 
100     def tianqiforecast(self, searchcity):
101 
102         city = searchcity
103         url = 'http://toy1.weather.com.cn/search?cityname=' + city + '&callback=success_jsonpCallback&_=1548048506469'
104         response = requests.get(url, headers=self.headers)
105         html1 = response.content.decode('utf-8')
106         self.citys = re.findall('"ref":"(.*?)~.*?~(.*?)~.*?~(.*?)~.*?~.*?~.*?~.*?~(.*?)"', html1, re.S)
107         if (len(self.citys) == 0):
108             a = "出错了,未查找到该城市"
109             self.result.insert(tkinter.END, a)
110             return
111         # 显示当前城市常用查询点
112         plist = []
113         for i in range(0, len(self.citys)):
114             # print(i + 1, ':%14s ' % "".join(citys[i]))
115             plist.append(self.citys[i][1])
116         pos = tuple(plist)
117         self.comboxlist["values"] = pos
118         self.comboxlist.current(0)
119         if len(self.citys) != 0:
120             self.query(0)
121 
122     def search(self):
123         mycity = self.city.get()
124         if (mycity != ''):
125             self.clear()
126             self.tianqiforecast(mycity)
127 
128     def query(self, choose):
129         if (len(self.citys[choose][0]) == 9):
130             if (self.citys[choose][0][0] != '1' or self.citys[choose][0][1] != '0' or self.citys[choose][0][2] != '1'):
131                 # 查询国外天气
132 
133                 url2 = 'http://www.weather.com.cn/weathern/' + self.citys[choose][0] + '.shtml'
134                 responseweather = requests.get(url2, headers=self.headers)
135                 html2 = responseweather.content.decode('utf-8')
136 
137                 weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
138                 temp_weather = re.findall(
139                     '<p class="weather-info">(.*?)</p>.*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>',
140                     html2, re.S)
141                 if len(temp_weather) < 7:
142                     # 当天
143                     today1 = re.findall(
144                         '<li class="blue-item active".*?>(.*?)<div class="item-active"></div>\\n</li>',
145                         html2, re.S)
146                     today = re.findall('<p class="weather-info">(.*?)</p>.*?<p class="wind-info">(.*?)</p>', today1[0],
147                                        re.S)
148                     print(today)
149                     # 后6天
150                     weather.append(temp_weather)
151                 else:
152                     weather.append(temp_weather)
153 
154                 Hightempture = re.findall(
155                     '<script>var eventDay =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2,
156                     re.S)
157                 Lowtempture = re.findall(
158                     'var eventNight =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];',
159                     html2, re.S)
160                 # print(Hightempture,Lowtempture)
161                 b = '查询城市为:' + str(self.citys[choose][3]) + '    ' + str(self.citys[choose][1])
162                 self.result.insert(tkinter.END, b)
163                 if len(temp_weather) < 7:  # 如日本
164                     if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \
165                             len(Lowtempture[0]) != 7 or len(Hightempture[0]) != 7:
166                         a = '系统出错,数据不完整:'
167                         self.result.insert(tkinter.END, a)
168                         self.result.insert(tkinter.END, url2)
169                         print(url2)
170                     else:
171                         for i in range(0, 7):
172                             if i < 1:
173                                 a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
174                                     i] + '' + str(today[0][0]) + ' 风:' + str(today[0][1])
175                                 self.result.insert(tkinter.END, a)
176 
177                             else:
178                                 a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
179                                     i] + '' + "".join(weather[7][i - 1])
180 
181                                 self.result.insert(tkinter.END, a)
182                 else:  # 如美国
183                     if len(temp_weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \
184                             len(Lowtempture[0]) != 7 or len(Hightempture[0]) != 7:
185                         a = '系统出错,数据不完整:'
186                         self.result.insert(tkinter.END, a)
187                         self.result.insert(tkinter.END, url2)
188                         print(url2)
189                     else:
190                         for i in range(0, 7):
191                             a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
192                                 i] + '' + "".join(weather[7][i])
193 
194                             self.result.insert(tkinter.END, a)
195 
196 
197             else:#国内天气查询
198                 url2 = 'http://www.weather.com.cn/weathern/' + self.citys[choose][0] + '.shtml'
199                 responseweather = requests.get(url2, headers=self.headers)
200                 html2 = responseweather.content.decode('utf-8')
201 
202                 weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
203                 weather.append(re.findall(
204                     '<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>',
205                     html2, re.S))
206                 Hightempture = re.findall(
207                     '<script>var eventDay =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2,
208                     re.S)
209                 Lowtempture = re.findall(
210                     'var eventNight =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];',
211                     html2, re.S)
212 
213                 b = '查询城市为:' + str(self.citys[choose][3]) + '    ' + str(self.citys[choose][1])
214                 self.result.insert(tkinter.END, b)
215                 if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \
216                         len(Lowtempture[0]) != 8 or len(Hightempture[0]) != 8:
217                     a = '系统出错,数据不完整:'
218                     self.result.insert(tkinter.END, a)
219                     self.result.insert(tkinter.END, url2)
220                     print(url2)
221                 else:
222                     for i in range(0, 8):
223                         a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
224                             i] + '' + "".join(weather[8][i])
225 
226                         self.result.insert(tkinter.END, a)
227 
228         if (len(self.citys[choose][0]) == 12):  # 查询搜索相关结果的下一个城市天气预报
229             url2 = 'http://forecast.weather.com.cn/town/weathern/' + self.citys[choose][0] + '.shtml'
230             responseweather = requests.get(url2, headers=self.headers)
231             html2 = responseweather.content.decode('utf-8')
232 
233             weather = re.findall('<li class="date-.*?".*?"da.*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S)
234 
235             html2 = re.sub('lt;', '<', html2)
236             weather.append(re.findall(
237                 '<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">\\r\\n(.*?)\\r\\n',
238                 html2, re.S))
239 
240             Hightempture = re.findall(
241                 'var eventDay = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2, re.S)
242 
243             Lowtempture = re.findall(
244                 'var eventNight = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];',
245                 html2, re.S)
246             # print(Hightempture,Lowtempture)
247             b = '查询城市为:' + str(self.citys[choose][3]) + '   ' + str(self.citys[choose][2]) + '    ' + str(
248                 self.citys[choose][1])
249             self.result.insert(tkinter.END, b)
250 
251             if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \
252                     len(Lowtempture[0]) != 8 or len(Hightempture[0]) != 8:
253                 a = '系统出错,数据不完整:'
254                 self.result.insert(tkinter.END, a)
255                 self.result.insert(tkinter.END, url2)
256                 print(url2)
257             else:
258                 for i in range(0, 8):
259                     a = "".join(weather[i]) + '    ' + Lowtempture[0][i] + '℃  ~  ' + Hightempture[0][
260                         i] + '' + "".join(weather[8][i])
261                     # print(a)
262                     self.result.insert(tkinter.END, a)
263 
264     '''
265     选择搜索城市相关的下一个城市名称,并进行天气查询
266     '''
267 
268     def choose(self, event):
269         c = self.comboxlist.get()
270         choose = -1
271         for i in range(0, len(self.citys)):
272             if c == self.citys[i][1]:
273                 choose = i;
274                 break;
275         if choose != -1:
276             self.query(choose)
280 
281     '''
282     清除天气查询结果
283     '''
284 
285     def clear(self):
286         self.result.delete(0, tkinter.END)
287         # self.city.delete(0, tkinter.END)
288         # tkinter.messagebox.showerror('showerror', 'hello')
289 
290 
291 
292 if __name__ == '__main__':
293     # 获取当前城市
294     default_city = get_local_city();
295 
296     myframe = MyFrame(default_city)
297     myframe.root.mainloop()

 

 

通过该程序主要学习爬虫的解决问题思想和熟悉规则表达式Re模块,网络数据抓取requests模块,selenium模块及TkInter GUI模块的具体使用方法。

对于初学python网友可以参考一下几本优秀的图书

零基础学python  配套光盘下载地址

Python从入门到项目实践

 

其它有关爬虫开发参考资料可参考如下书籍:

查看目录

Python数据抓取技术与实战.pdf
Python爬虫入门到实战.pdf
Python爬虫实战入门教程.pdf
Python网络数据采集.pdf
PYTHON网络爬虫从入门到实践.pdf
《Python爬虫开发与项目实战》.pdf
精通Scrapy网络爬虫.pdf
网络爬虫-Python和数据分析.pdf
网络爬虫全解析 技术、原理与实践.pdf
Python 3爬虫、数据清洗与可视化实战

...

 


 

常用软件开发学习资料目录(详见我爱分享资源论坛:  

1.经典编程电子书收藏  

2.C&C++编程学习资料收藏   

3.算法及数据结构(有关c,c++,java)   

4.Java开发学习资料收藏      

5.Android开发学习资料收藏  

6.Python开发学习资料收藏  

7.大数据,机器学习,人工智能资料收藏

8.Docker资料收藏

 

posted @ 2019-03-25 09:04  中国人醒来了  阅读(2880)  评论(0编辑  收藏  举报