天气数据爬取+pyechart可视化

数据爬取/处理

爬取深圳2021年全年的天气历史数据。

网址链接:https://lishi.tianqi.com/shenzhen/

代码:

import requests
from lxml import etree
import pandas as pd

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36 Edg/89.0.774.68'
}

# title=html.xpath('/html/body/div[7]/div[1]/div[4]/div//text()')
#
# # (国际化功能中常常用到),\u4e00-\u9fa5是用来判断是不是中文的一个条件。
# title_list = [item for item in title if re.findall('[\u4e00-\u9fa5]', item)]
# # print(title_list)

#获取所有2021年12个月的url链接
def get_url_list(s_page, e_page):
    url_list = []
    for i in range(s_page, e_page):
        if i < 10:
            url_list.append('https://lishi.tianqi.com/shenzhen/2021{}.html'.format('0' + str(i)))
        else:
            url_list.append('https://lishi.tianqi.com/shenzhen/2021{}.html'.format(str(i)))
    return url_list
    

#解析获取url数据,并解析
def parse_datas(url):
    response = requests.get(url, headers=headers)
    html = etree.HTML(response.text)
    t_list = html.xpath('/html/body/div[7]/div[1]/div[4]/ul/li')
    datas=[]
    for li in t_list:
        data = li.xpath('./div/text()')
        datas.append(data)
    return datas


#合并每一页url获取的数据
def temp_datas(s_page, e_page):
    url_list = get_url_list(s_page, e_page)
    c_datas = []
    for url in url_list:
        c_datas.extend(parse_datas(url))
    return c_datas


if __name__ == '__main__':
    datas = pd.DataFrame(temp_datas(s_page=1, e_page=13),
                         columns=['date', 'h_temp', 'l_temp', 'weather', 'w_d'])
    print(datas)

数据存储

存储数据成CSV格式,或存入sqlite数据库中。

代码:

from weather_spider import temp_datas           #从上面数据爬取的模块文件中导入temp_datas函数
import os
import pandas as pd
from sqlalchemy import create_engine

def save_csv(datas):
    '''存储成csv数据格式'''
    if not os.path.exists('./weather_datas'):
        os.mkdir('./weather_datas')

    save_path = './weather_datas/weather_datas_sz.csv'

    datas.to_csv(save_path, index=False)

def save_sqlite(datas, datasname):
    '''存储到sqlite数据库'''
    engine = create_engine('sqlite:///weather_datas/weather_datas.db')
    datas.to_sql(datasname, engine, index=False)

if __name__ == '__main__':
    datas = pd.DataFrame(temp_datas(s_page=1, e_page=13),
                         columns=['date', 'h_temp', 'l_temp', 'weather', 'w_d'])
    save_sqlite(datas, 'weather_datas_sz')

数据可视化

对2021年深圳的天气数据进行可视化分析。

import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from pyecharts import options as opts
from pyecharts.charts import Bar, Line, Timeline, Page, Pie, Grid


def get_datas():
    """从sqlite数据库读取数据"""
    engine = create_engine(r'sqlite:///weather_datas/weather_datas.db')
    sql = 'SELECT * FROM weather_datas_sz'
    datas = pd.read_sql(sql, engine)
    # 从csv文件读取数据
    # datas = pd.read_csv('./weather_datas/weather_datas_sz.csv')

    #数据处理
    datas.date = datas.date.str.split(' ', expand=True)[0]
    datas.h_temp = datas.h_temp.str.split('℃', expand=True)[0].astype(np.float)
    datas.l_temp = datas.l_temp.str.split('℃', expand=True)[0].astype(np.float)
    datas['w_d0'] = datas['w_d'].str.split(' ', expand=True)[0]
    datas['w_d1'] = datas['w_d'].str.split(' ', expand=True)[1]
    datas['month'] = datas.date.apply(lambda x: x.split('-')[1])
    datas['deltaT'] = datas.h_temp - datas.l_temp
    datas['averageT'] = (datas.h_temp + datas.l_temp) / 2

    return datas


def t_line(datas, city):
    '''绘制折线轮播图'''
    t2 = Timeline()
    for i in datas.month.unique():
        data = datas[datas.month == i]

        line = Line()
        line.add_xaxis(data['date'].tolist())
        #最高气温折线
        line.add_yaxis('最高气温', data['h_temp'].tolist(),
                       markpoint_opts=opts.MarkPointOpts(
                           data=[
                               opts.MarkPointItem(type_='max', name='最大值'),
                           ]
                       ),
                       markline_opts=opts.MarkLineOpts(
                           data=[opts.MarkLineItem(type_='average', name='平均值')]
                       )
                       )
        #最低气温折线
        line.add_yaxis('最低气温', data['l_temp'].tolist(),
                       markpoint_opts=opts.MarkPointOpts(
                           data=[
                               opts.MarkPointItem(type_='min', name='最小值'),
                           ]
                       ),
                       markline_opts=opts.MarkLineOpts(
                           data=[opts.MarkLineItem(type_='average', name='平均值')]
                       )
                       )
        #最高温与最低温的差值
        line.add_yaxis('最高温-最低温', data['deltaT'].tolist(),
                       markpoint_opts=opts.MarkPointOpts(
                           data=[
                               opts.MarkPointItem(type_='max', name='最大值'),
                               opts.MarkPointItem(type_='min', name='最小值'),
                           ]
                       ),
                       markline_opts=opts.MarkLineOpts(
                           data=[opts.MarkLineItem(type_='average', name='平均值')]
                       )
                       )

        line.set_global_opts(
            title_opts=opts.TitleOpts(title="{}月份的气温变化(℃)".format(i), subtitle=city),
            toolbox_opts=opts.ToolboxOpts(is_show=True),
            xaxis_opts=opts.AxisOpts(type_='category')

        )
        t2.add(line, '{}月'.format(i))

    return t2

def t_pie(datas, city):
    """绘制饼型轮播图"""
    tp = Timeline()
    for i in datas.month.unique():
        data = datas[datas['month'] == i]

        weather_datas = data.groupby('weather').size().reset_index()
        weather_datas = [list(z) for z in zip(weather_datas['weather'], weather_datas[0])]

        wind_datas = data.groupby('w_d0').size().reset_index()
        wind_datas = [list(z) for z in zip(wind_datas['w_d0'], wind_datas[0])]

        #风向情况
        pie1 = (
        Pie()
        .add(
            "",
            wind_datas,
            radius=["30%", "60%"],
            center=["75%", "50%"],
            rosetype="area",
        )
        .set_global_opts(legend_opts=opts.LegendOpts(is_show=False))
        )

        #天气情况
        pie2 = (
            Pie()
                .add(
                "",
                weather_datas,
                radius=["30%", "60%"],
                center=["25%", "50%"],
                rosetype="area")
                .set_global_opts(title_opts=opts.TitleOpts(title="{}月份天气/风向情况".format(i), subtitle=city),
                                legend_opts=opts.LegendOpts(is_show=False))
        )
        grid = (
            Grid()
            .add(pie1, grid_opts=opts.GridOpts(pos_left="55%"))
            .add(pie2, grid_opts=opts.GridOpts(pos_right="55%"))
        )
        tp.add(grid, '{}月'.format(i))

    return tp

def bar_plot(datas, city):
    """2021年12个月各月的平均气温"""
    x_list = [i+'月' for i in datas.groupby('month')['averageT'].mean().index]
    y_list = [round(i, 1) for i in datas.groupby('month')['averageT'].mean().values]

    bar = Bar()
    bar.add_xaxis(x_list)
    bar.add_yaxis('', y_list)
    bar.set_global_opts(title_opts=opts.TitleOpts(title='各月平均气温(℃)', subtitle=city))

    return bar

def title(city):
    """利用Pie模块绘制页面标头"""
    c = (
        Pie()
        .set_global_opts(
            title_opts=opts.TitleOpts(title='2021年{}天气情况分析'.format(city),
                                      title_textstyle_opts=opts.TextStyleOpts(font_size=36, color='#000000'),
                                      pos_left='center',
                                      pos_top='middle'))
    )
    return c

def page_layout(datas, city):
    '''布置页面'''
    page = Page(layout=Page.DraggablePageLayout)
    page.add(
        title(city),
        bar_plot(datas, city),
        t_line(datas, city),
        t_pie(datas, city)
    )
    return page

def resave_page():
    """调整页面布局后重新存储生成新页面"""
    page = Page()
    page.save_resize_html(source='./2021年1-12月份深圳天气分析统计.html', cfg_file=r'./chart_config.json',
                          dest='mynew_render.html')

if __name__ == '__main__':
     resave_page()

posted @ 2022-01-24 12:40  溪奇的数据  阅读(1092)  评论(6编辑  收藏  举报