python之天气爬虫
代码已调试通过
# 导入第三方包 import random import re import time import pandas as pd import requests # 构造请求头 headers = { 'Accept' : '*/*' , 'Accept -Enconding' : 'gzip,deflate' , 'Accept-Language' : 'zh-CN,zh;q=0.9' , 'conection' : 'keep-alive' , 'User-Agent' : 'Mozilla/5.0 (windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/63.0.3236.0 ' 'Safari/537.36 ' } # 生成所有需要抓取的链接 urls = [] for year in range ( 2012 , 2019 ): for month in range ( 1 , 13 ): if year < = 2016 : urls.append( 'http://tianqi.2345.com/t/wea_history/js/58362_%s%s.js' % (year, month)) else : if month < 10 : print ( "未获取天气数据" ) break ; info = [] for url in urls: random.randint( 3 , 6 ) response = requests.get(url, headers = headers).text # 发送url链接的请求,并返回响应数据 print (response) city = re.findall( "city:'(.*?)'," , "".join(response)) # 正则表达式获取城市 ymd = re.findall( "ymd:'(.*?)'," , "".join(response)) # 正则表达式获取日期数据 high = re.findall( ",bWendu:'(.*?)'," , "".join(response)) # 正则表达式获取最高气温数据,正则表达式不加最前面的逗号,容易多匹配avgbWendu字段 low = re.findall( ",yWendu:'(.*?)'," , "".join(response)) # 正则表达式获取最低气温数据 tianqi = re.findall( "tianqi:'(.*?)'," , "".join(response)) # 正则表达式获取天气状况数据 fengxiang = re.findall( "fengxiang:'(.*?)'," , "".join(response)) # 正则表达式获取风向数据 aqi = re.findall( "aqi:'(.*?)'," , "".join(response)) # 正则表达式获取空气质量指标数据 aqiInfo = re.findall( "aqiInfo:'(.*?)'," , "".join(response)) # 正则表达式获取空气质量说明数据 aqiLevel = re.findall( "aqiLevel:'(.*?)'}" , "".join(response)) # 正则表达式获取空气质量水平数据 maxWendu = re.findall( "maxWendu:'(.*?)'," , "".join(response)) # 正则表达式获取最高 温度 minWendu = re.findall( "maxWendu:'(.*?)'," , "".join(response)) # 正则表达式获取最低温度 avgbWendu = re.findall( "avgbWendu:'(.*?)'," , "".join(response)) # 正则表达式获取平均白天温度 # 犹豫 2012-2015没有空气质量相关的数据,故需要分开处理 # 循环并通过正则匹配获取相关数据 if len (aqi) = = 0 : fengli = re.findall( "fengli:'(.*?)'}" , "".join(response)) # 正则表达式获取风力数据 avgyWendu = re.findall( "avgyWendu:'(.*?)'}" , "".join(response)) # 正则表达式获取平均夜里温度 aqi = '' aqiInfo = '' aqiLevel = '' df = pd.DataFrame.from_dict( { 'city' : city, 'ymd' : ymd, 'high' : high, 'low' : low, 'tianqi' : tianqi, 'fengxiang' : fengxiang, 'fengli' : fengli, 'aqi' : aqi, 'aqiInfo' : aqiInfo, 'aqiLevel' : aqiLevel, 'maxWendu' : maxWendu, 'minWendu' : minWendu, 'avgbWendu' : avgbWendu, 'avgyWendu' : avgyWendu}, orient = 'index' ) pl = df.transpose() info.append(pl) else : fengli = re.findall( "fengli:'(.*?)'," , "".join(response)) # 正则表达式获取风力数据 avgyWendu = re.findall( "avgyWendu:'(.*?)'," , "".join(response)) # 正则表达式获取平均夜里温度 df = pd.DataFrame.from_dict( { 'city' :city, 'ymd' : ymd, 'high' : high, 'low' : low, 'tianqi' : tianqi, 'fengxiang' : fengxiang, 'fengli' : fengli, 'aqi' : aqi, 'aqiInfo' : aqiInfo, 'aqiLevel' : aqiLevel, 'maxWendu' :maxWendu, 'minWendu' :minWendu, 'avgbWendu' :avgbWendu, 'avgyWendu' :avgyWendu}, orient = 'index' ) # pl = df.transpose() info.append(pl) time.sleep( 3 ) # 每循环一次,都随机停顿几秒 # 将存储的所有天气数据进行合并,生成数据表格 weather = pd.concat(info) # 数据导出 time = time.strftime( '%Y%m%d%H%M%S' , time.localtime(time.time())) weather.to_csv( 'weather_new' + time + '.csv' , index = False ) |
运行结果如下:
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 【杭电多校比赛记录】2025“钉耙编程”中国大学生算法设计春季联赛(1)