Python之爬取天气预报并生成图表

  使用Python爬虫去天气预报网站爬取天气数据存储至MySQL然后使用pyecharts实现绘图

  本次代码可以在gitee下载https://gitee.com/liuyueming/weatherSpider.git

  一,环境查看

  Python版本

1
2
C:\Users\liuym\Desktop\weatherSpider>python --version
Python 3.6.6

  MySQL版本

1
2
mysql --version
mysql  Ver 14.14 Distrib 5.7.22, for Linux (x86_64) using  EditLine wrapper

  二,代码

  安装模块

1
2
3
4
5
pip3 install pymysql
pip3 install bs4
pip3 install lxml
pip3 install requests
pip3 install pyecharts

  运行过程中遇到没有安装的库使用pip install安装即可

  本次爬取的天气预报网站为 http://www.tianqihoubao.com/  

  主程序main.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import pymysql
import requests
from bs4 import BeautifulSoup
  
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
cursor = db.cursor()
  
#获取网页信息
def get_html(url):
    html = requests.get(url)
    html.encoding = html.apparent_encoding
    soup = BeautifulSoup(html.text, 'lxml')
    return soup
  
year = ['2020']
  
month = ['01', '02', '03', '04','05', '06', '07', '08', '09', '10', '11', '12']
  
  
time = [y+x for y in year for x in month]
for date in time:
    url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html'
    soup = get_html(url)
    sup = soup.find('table',attrs={'class':'b'})
    tr = sup.find_all('tr')
    for trl in tr[1:]:
        td = trl.find_all('td')
        href = td[0].find('a')['href'] #获取链接信息
        title = td[0].find('a')['title'] #获取名称
        weather = td[1].get_text().replace('\r\n','').replace(' ','') #获取天气状况
        wendu = td[2].get_text().strip().replace(' ','').replace('\r\n','')#获取温度
        fengli = td[3].get_text().strip().replace(' ','').replace('\r\n','') #获取风力大小      
  
        sql = """insert into weather_spider(time_local, link, weather_type, temperature, wind_power) \
                values(%s, %s, %s, %s, %s)"""
        cursor.execute(sql, (title, href, weather, wendu, fengli))
        db.commit()
db.close
print('爬取完成')

  代码解析

1
db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' ) # 数据库连接信息,根据实际情况修改

  

1
year = ['2020'] # 需要爬取的年份信息

  

1
url = 'http://www.tianqihoubao.com/lishi/nanchang/month/'+ date +'.html' # 需要爬取的城市信息 本次为南昌

  生成html程序myVisualize.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import pymysql
import pyecharts.options as opts
from pyecharts.charts import Line, Pie
 
def create_temp():
    db = pymysql.connect(host="localhist", user="root", passwd="123456", db="weather", charset='utf8' )
    cursor = db.cursor()
    cursor.execute('SELECT * FROM weather_spider;')
    data = cursor.fetchall()
    max_temp_list = []
    min_temp_list = []
    day_list = []
    for d in data:
        max_temp_list.append(d[3].split('/')[0].replace('℃', ''))
        min_temp_list.append(d[3].split('/')[1].replace('℃', ''))
        day_list.append(d[0][:11])
    line = Line()
    line.add_xaxis(day_list)
    line.add_yaxis(series_name="最高气温", y_axis=max_temp_list, is_symbol_show = False,
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[opts.MarkLineItem(type_="average", name="平均值")]
        ))
    line.add_yaxis(series_name="最低气温", y_axis=min_temp_list, is_symbol_show = False,
        markpoint_opts=opts.MarkPointOpts(
            data=[
                opts.MarkPointItem(type_="max", name="最大值"),
                opts.MarkPointItem(type_="min", name="最小值"),
            ]
        ),
        markline_opts=opts.MarkLineOpts(
            data=[opts.MarkLineItem(type_="average", name="平均值")]
        ))
    line.set_global_opts(yaxis_opts=opts.AxisOpts(name="温度(℃)"),
        title_opts=opts.TitleOpts(title="南昌气温变化表"),
        tooltip_opts=opts.TooltipOpts(trigger="axis"))
     
    line.render('南昌2020气温变化表.html')
    print('气温图生成成功')
    db.close()
    cursor.close()
 
def create_weather():
    db = pymysql.connect(host="localhost", user="root", passwd="123456", db="weather", charset='utf8' )
    cursor = db.cursor()
    attr = ["雨", "多云", "晴", "阴", "雪", "雾", "霾"]
    rain = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雨%";')
    cloud = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%多云%";')
    sun = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%晴%";')
    overcast = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%阴%";')
    snow = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雪%";')
    fog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%雾%";')
    smog = cursor.execute('SELECT * FROM weather_spider WHERE weather_type like "%霾%";')
    weather = [rain, cloud, sun, overcast, snow, fog, smog]
    pie = (
        Pie()
        .add("", [list(z) for z in zip(attr, [rain, cloud, sun, overcast, snow, fog, smog])])
        .set_global_opts(title_opts=opts.TitleOpts(title="天气占比表"))
        .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
    )
     
    pie.render('南昌2020天气占比表.html')
    print('天气图生成成功')
    db.close()
    cursor.close()
 
 
if __name__ == '__main__':
    create_temp()
    create_weather()

  MySQL操作(安装MySQL不详述)

  创建库

1
create databese weather;

  导入表

1
mysql -uroot -pioYbcZ1u -h127.0.0.1 weather < weather.sql

  表语句sql如下weather.sql

1
2
3
4
5
6
7
8
DROP TABLE IF EXISTS `weather_spider`;
CREATE TABLE `weather_spider` (
  `time_local` varchar(255) DEFAULT NULL,
  `link` varchar(255) DEFAULT NULL,
  `weather_type` varchar(255) DEFAULT NULL,
  `temperature` varchar(255) DEFAULT NULL,
  `wind_power` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;

  三,运行

  运行主程序

1
python main.py

  运行正常会往MySQL数据库写入数据,登录数据库搜索查看

1
select * from weather_spider;

 

   运行生成html程序

1
2
3
python myVisualize.py
气温图生成成功
天气图生成成功

  在当前目录会生成html,打开查看

 

 

 

posted @   minseo  阅读(1306)  评论(0编辑  收藏  举报
编辑推荐:
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
阅读排行:
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· AI编程工具终极对决:字节Trae VS Cursor,谁才是开发者新宠?
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
历史上的今天:
2018-12-23 Docker之OVS网络
点击右上角即可分享
微信分享提示