python 爬取历史天气
python 爬取历史天气
官网:http://lishi.tianqi.com/luozhuangqu/201802.html
# encoding:utf-8 import requests from bs4 import BeautifulSoup import pymysql import pandas as pd date_list = [x.strftime('%Y%m') for x in list(pd.date_range(start='2016-09', end='2018-09', freq="m"))] url_str = "http://lishi.tianqi.com/" # citys = ["苍山", "费县", "河东区", "莒南", "临沭", "兰山市", "罗庄区", "蒙阴", "平邑", "郯城", "沂南", "沂水"] # city_code = ["cangshan", "feixian", "hedong", "junan", "linshu", "lanshan", "luozhuangqu", "mengyin", "pingyi", # "tancheng", "yinan", "yishui"] city_code = ["yishui"] urls = [] # url拼接 for city in city_code: for date_item in date_list: url = url_str + city + "/" + date_item + ".html" urls.append(url) # 数据爬取 for url in urls: response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') weather_list = soup.select('div[class="tqtongji2"]') for weather in weather_list: weather_date = weather.select('a')[0].string.encode('utf-8') ul_list = weather.select('ul') for ul in ul_list[1:]: li_list = ul.select('li') tCity = "沂水" tDate = li_list[0].string tTopTem = li_list[1].string tLowTem = li_list[2].string tWeather = li_list[3].string tWindDir = li_list[4].string tWindPower = li_list[5].string # 数据库存储 conn = pymysql.connect(host='localhost', user='root', passwd='123456', database='weather', charset='utf8') # 链接数据库 cursor = conn.cursor() # 获得游标 # 向数据库添加数据的SQL语句 sql = "insert into mWeather (mCity,mDate,mTopTem,mLowTem,mWeather,mWindDir,mWindPower) values ('%s','%s','%s','%s','%s','%s','%s')" \ % (tCity, tDate, tTopTem, tLowTem, tWeather, tWindDir, tWindPower) cursor.execute(sql) # 执行 conn.commit() # 提交添加数据的命令 cursor.close() conn.close() print(tCity + " 城市 " + tDate + " 数据 ----- 爬取成功!")
【版权声明】本博文著作权归作者所有,任何形式的转载都请联系作者获取授权并注明出处!
【重要说明】博文仅作为本人的学习记录,论点和观点仅代表个人而不代表技术的真理,目的是自我学习和有幸成为可以向他人分享的经验,因此有错误会虚心接受改正,但不代表此刻博文无误!
【博客园地址】粘你一身大鼻嘎 : http://www.cnblogs.com/wjw1014
【CSDN地址】粘你一身大鼻嘎 : https://wjw1014.blog.csdn.net/
【Gitee地址】粘你一身大鼻嘎 :https://gitee.com/wjw1014
【重要说明】博文仅作为本人的学习记录,论点和观点仅代表个人而不代表技术的真理,目的是自我学习和有幸成为可以向他人分享的经验,因此有错误会虚心接受改正,但不代表此刻博文无误!
【博客园地址】粘你一身大鼻嘎 : http://www.cnblogs.com/wjw1014
【CSDN地址】粘你一身大鼻嘎 : https://wjw1014.blog.csdn.net/
【Gitee地址】粘你一身大鼻嘎 :https://gitee.com/wjw1014