天气预报爬虫例子源码
这是一个通过python中TkInter,Requests,Re模块实现的天气预报爬虫程序,软件界面为图形界面,软件运行界面如下:
操作方法只需要在查询城市编辑框输入完整的城市名称或完整拼音字母,比如深圳可以输入shenzhen,然后点查询就可查询,目前程序实现了国内和国际的天气预报查询,国内8日内天气预报查询,国外实现7日内天气预报查询功能,
同时支持软件开启自动显示当前城市天气预报查询功能:
源代码如下:
1 # *_* coding:utf-8 *_* 2 3 # 开发团队:中国软件开发团队 4 # 开发人员:Administrator 5 # 开发时间:2019/3/23 5:16 6 # 文件名称:weatherSpider 7 # 开发工具:PyCharm 8 9 10 import tkinter 11 import tkinter.messagebox 12 from tkinter import ttk 13 import requests 14 # from PIL import ImageTk as itk 15 from selenium import webdriver 16 # from selenium.webdriver.common.desired_capabilities import DesiredCapabilities 17 from selenium.webdriver.chrome.options import Options 18 import re 19 20 21 ''' 22 获取本地所在城市名称 23 ''' 24 25 26 def get_local_city(): 27 chrome_options = Options() 28 chrome_options.add_argument('--headless') 29 chrome_options.add_argument('--disable-gpu') 30 # 31 # 更换头部 32 chrome_options.add_argument( 33 'user-agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36"') 34 35 driver = webdriver.Chrome(executable_path='./chromedriver.exe', chrome_options=chrome_options) 36 driver.get("http://www.weather.com.cn") 37 text = driver.page_source 38 39 result = re.findall('<span class="city_name"><em>(.*?)</em></span>', text, re.S) 40 driver.close() 41 return result[0] 42 43 44 class MyFrame(tkinter.Frame): 45 def __init__(self, default_city): 46 self.root = tkinter.Tk() 47 48 self.root.title("天气查询") 49 self.root.geometry('1200x700+400+220') 50 # 修改默认应用程序图标 51 self.root.iconbitmap('camero.ico') 52 53 bg = tkinter.Canvas(self.root, width=1200, height=600, bg='white') 54 # self.img = itk.PhotoImage(file="bg.jpg") 55 bg.place(x=100, y=40) 56 # bg.create_image(0, 0, anchor=tkinter.NW, image=self.img) 57 58 self.city = tkinter.Entry(self.root, width=16, font=("仿宋", 18, "normal")) 59 self.city.place(x=200, y=60) 60 self.city.insert(0, default_city) 61 62 citylabel = tkinter.Label(self.root, text='查询城市', font=("仿宋", 18, "normal")) 63 citylabel.place(x=80, y=60) 64 65 # 查询按钮 66 chaxun = tkinter.Button(self.root, width=10, height=3, text="查询", bg='#00CCFF', bd=5, font="bold",command=self.search) 67 68 chaxun.place(x=800, y=50) 69 70 # 清除按钮 71 clearbtn = tkinter.Button(self.root, width=10, height=3, text="清除", bg='#00CCFF', bd=5, font="bold",command=self.clear) 72 73 clearbtn.place(x=950, y=50) 74 75 poslabel = tkinter.Label(self.root, text='选择位置', font=("仿宋", 18, "normal")) 76 poslabel.place(x=80, y=100) 77 78 comvalue = tkinter.StringVar() # 窗体自带的文本,新建一个值 79 self.comboxlist = ttk.Combobox(self.root, width=30, height=18, font=("仿宋", 18, "normal"), 80 textvariable=comvalue) # 初始化 81 self.comboxlist["values"] = ("1", "2", "3") 82 self.comboxlist.current(0) # 选择第一个 83 self.comboxlist.bind("<<ComboboxSelected>>", self.choose) # 绑定事件,(下拉列表框被选中时,绑定choose()函数) 84 self.comboxlist.place(x=200, y=100) 85 86 self.result = tkinter.Listbox(self.root, heigh=18, width=65, font=("仿宋", 20, "normal")) # 显示天气框 87 self.result.place(x=125, y=150) 88 89 self.citys = [] 90 91 self.headers = { 92 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36', 93 'Cookie': '__guid=182823328.3322839646442213000.1543932524694.901; vjuids=1858d43b6.167798cbdb7.0.8c4d7463d5c5d; vjlast=1543932526.1543932526.30; userNewsPort0=1; f_city=%E5%B9%B3%E9%A1%B6%E5%B1%B1%7C101180501%7C; Hm_lvt_080dabacb001ad3dc8b9b9049b36d43b=1543932526,1543932551,1543932579; Wa_lvt_1=1547464114,1547464115,1547880054,1547983123; defaultCty=101181001; defaultCtyName=%u5546%u4E18; monitor_count=6; Wa_lpvt_1=1547983809' 94 } 95 96 # 开启本地天气查询 97 if (default_city != ''): 98 self.tianqiforecast(default_city) 99 100 def tianqiforecast(self, searchcity): 101 102 city = searchcity 103 url = 'http://toy1.weather.com.cn/search?cityname=' + city + '&callback=success_jsonpCallback&_=1548048506469' 104 response = requests.get(url, headers=self.headers) 105 html1 = response.content.decode('utf-8') 106 self.citys = re.findall('"ref":"(.*?)~.*?~(.*?)~.*?~(.*?)~.*?~.*?~.*?~.*?~(.*?)"', html1, re.S) 107 if (len(self.citys) == 0): 108 a = "出错了,未查找到该城市" 109 self.result.insert(tkinter.END, a) 110 return 111 # 显示当前城市常用查询点 112 plist = [] 113 for i in range(0, len(self.citys)): 114 # print(i + 1, ':%14s ' % "".join(citys[i])) 115 plist.append(self.citys[i][1]) 116 pos = tuple(plist) 117 self.comboxlist["values"] = pos 118 self.comboxlist.current(0) 119 if len(self.citys) != 0: 120 self.query(0) 121 122 def search(self): 123 mycity = self.city.get() 124 if (mycity != ''): 125 self.clear() 126 self.tianqiforecast(mycity) 127 128 def query(self, choose): 129 if (len(self.citys[choose][0]) == 9): 130 if (self.citys[choose][0][0] != '1' or self.citys[choose][0][1] != '0' or self.citys[choose][0][2] != '1'): 131 # 查询国外天气 132 133 url2 = 'http://www.weather.com.cn/weathern/' + self.citys[choose][0] + '.shtml' 134 responseweather = requests.get(url2, headers=self.headers) 135 html2 = responseweather.content.decode('utf-8') 136 137 weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S) 138 temp_weather = re.findall( 139 '<p class="weather-info">(.*?)</p>.*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>', 140 html2, re.S) 141 if len(temp_weather) < 7: 142 # 当天 143 today1 = re.findall( 144 '<li class="blue-item active".*?>(.*?)<div class="item-active"></div>\\n</li>', 145 html2, re.S) 146 today = re.findall('<p class="weather-info">(.*?)</p>.*?<p class="wind-info">(.*?)</p>', today1[0], 147 re.S) 148 print(today) 149 # 后6天 150 weather.append(temp_weather) 151 else: 152 weather.append(temp_weather) 153 154 Hightempture = re.findall( 155 '<script>var eventDay =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2, 156 re.S) 157 Lowtempture = re.findall( 158 'var eventNight =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', 159 html2, re.S) 160 # print(Hightempture,Lowtempture) 161 b = '查询城市为:' + str(self.citys[choose][3]) + ' ' + str(self.citys[choose][1]) 162 self.result.insert(tkinter.END, b) 163 if len(temp_weather) < 7: # 如日本 164 if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \ 165 len(Lowtempture[0]) != 7 or len(Hightempture[0]) != 7: 166 a = '系统出错,数据不完整:' 167 self.result.insert(tkinter.END, a) 168 self.result.insert(tkinter.END, url2) 169 print(url2) 170 else: 171 for i in range(0, 7): 172 if i < 1: 173 a = "".join(weather[i]) + ' ' + Lowtempture[0][i] + '℃ ~ ' + Hightempture[0][ 174 i] + '℃ ' + str(today[0][0]) + ' 风:' + str(today[0][1]) 175 self.result.insert(tkinter.END, a) 176 177 else: 178 a = "".join(weather[i]) + ' ' + Lowtempture[0][i] + '℃ ~ ' + Hightempture[0][ 179 i] + '℃ ' + "".join(weather[7][i - 1]) 180 181 self.result.insert(tkinter.END, a) 182 else: # 如美国 183 if len(temp_weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \ 184 len(Lowtempture[0]) != 7 or len(Hightempture[0]) != 7: 185 a = '系统出错,数据不完整:' 186 self.result.insert(tkinter.END, a) 187 self.result.insert(tkinter.END, url2) 188 print(url2) 189 else: 190 for i in range(0, 7): 191 a = "".join(weather[i]) + ' ' + Lowtempture[0][i] + '℃ ~ ' + Hightempture[0][ 192 i] + '℃ ' + "".join(weather[7][i]) 193 194 self.result.insert(tkinter.END, a) 195 196 197 else:#国内天气查询 198 url2 = 'http://www.weather.com.cn/weathern/' + self.citys[choose][0] + '.shtml' 199 responseweather = requests.get(url2, headers=self.headers) 200 html2 = responseweather.content.decode('utf-8') 201 202 weather = re.findall('<li class="date-.*?".*?".*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S) 203 weather.append(re.findall( 204 '<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">(.*?)</p>', 205 html2, re.S)) 206 Hightempture = re.findall( 207 '<script>var eventDay =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2, 208 re.S) 209 Lowtempture = re.findall( 210 'var eventNight =\["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', 211 html2, re.S) 212 213 b = '查询城市为:' + str(self.citys[choose][3]) + ' ' + str(self.citys[choose][1]) 214 self.result.insert(tkinter.END, b) 215 if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \ 216 len(Lowtempture[0]) != 8 or len(Hightempture[0]) != 8: 217 a = '系统出错,数据不完整:' 218 self.result.insert(tkinter.END, a) 219 self.result.insert(tkinter.END, url2) 220 print(url2) 221 else: 222 for i in range(0, 8): 223 a = "".join(weather[i]) + ' ' + Lowtempture[0][i] + '℃ ~ ' + Hightempture[0][ 224 i] + '℃ ' + "".join(weather[8][i]) 225 226 self.result.insert(tkinter.END, a) 227 228 if (len(self.citys[choose][0]) == 12): # 查询搜索相关结果的下一个城市天气预报 229 url2 = 'http://forecast.weather.com.cn/town/weathern/' + self.citys[choose][0] + '.shtml' 230 responseweather = requests.get(url2, headers=self.headers) 231 html2 = responseweather.content.decode('utf-8') 232 233 weather = re.findall('<li class="date-.*?".*?"da.*?">(.*?)</.*?"date-i.*?">(.*?)<.*?', html2, re.S) 234 235 html2 = re.sub('lt;', '<', html2) 236 weather.append(re.findall( 237 '<p class="weather-in.*?" title="(.*?)".*?title="(.*?)".*?title="(.*?)".*?<p class="wind-i.*?">\\r\\n(.*?)\\r\\n', 238 html2, re.S)) 239 240 Hightempture = re.findall( 241 'var eventDay = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', html2, re.S) 242 243 Lowtempture = re.findall( 244 'var eventNight = \["(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)","(.*?)"\];', 245 html2, re.S) 246 # print(Hightempture,Lowtempture) 247 b = '查询城市为:' + str(self.citys[choose][3]) + ' ' + str(self.citys[choose][2]) + ' ' + str( 248 self.citys[choose][1]) 249 self.result.insert(tkinter.END, b) 250 251 if len(weather) <= 0 or len(Lowtempture) <= 0 or len(Hightempture) <= 0 or \ 252 len(Lowtempture[0]) != 8 or len(Hightempture[0]) != 8: 253 a = '系统出错,数据不完整:' 254 self.result.insert(tkinter.END, a) 255 self.result.insert(tkinter.END, url2) 256 print(url2) 257 else: 258 for i in range(0, 8): 259 a = "".join(weather[i]) + ' ' + Lowtempture[0][i] + '℃ ~ ' + Hightempture[0][ 260 i] + '℃ ' + "".join(weather[8][i]) 261 # print(a) 262 self.result.insert(tkinter.END, a) 263 264 ''' 265 选择搜索城市相关的下一个城市名称,并进行天气查询 266 ''' 267 268 def choose(self, event): 269 c = self.comboxlist.get() 270 choose = -1 271 for i in range(0, len(self.citys)): 272 if c == self.citys[i][1]: 273 choose = i; 274 break; 275 if choose != -1: 276 self.query(choose) 280 281 ''' 282 清除天气查询结果 283 ''' 284 285 def clear(self): 286 self.result.delete(0, tkinter.END) 287 # self.city.delete(0, tkinter.END) 288 # tkinter.messagebox.showerror('showerror', 'hello') 289 290 291 292 if __name__ == '__main__': 293 # 获取当前城市 294 default_city = get_local_city(); 295 296 myframe = MyFrame(default_city) 297 myframe.root.mainloop()
通过该程序主要学习爬虫的解决问题思想和熟悉规则表达式Re模块,网络数据抓取requests模块,selenium模块及TkInter GUI模块的具体使用方法。
对于初学python网友可以参考一下几本优秀的图书
其它有关爬虫开发参考资料可参考如下书籍:
Python数据抓取技术与实战.pdf
Python爬虫入门到实战.pdf
Python爬虫实战入门教程.pdf
Python网络数据采集.pdf
PYTHON网络爬虫从入门到实践.pdf
《Python爬虫开发与项目实战》.pdf
精通Scrapy网络爬虫.pdf
网络爬虫-Python和数据分析.pdf
网络爬虫全解析 技术、原理与实践.pdf
Python 3爬虫、数据清洗与可视化实战
...
常用软件开发学习资料目录(详见我爱分享资源论坛):
1.经典编程电子书收藏
2.C&C++编程学习资料收藏
3.算法及数据结构(有关c,c++,java)
4.Java开发学习资料收藏
5.Android开发学习资料收藏
6.Python开发学习资料收藏
7.大数据,机器学习,人工智能资料收藏
8.Docker资料收藏