python爬虫--天气爬取并数据可视化

import requests
import pandas as pd

from matplotlib import pyplot as plt
from lxml import etree


url = 'http://www.tianqihoubao.com/lishi/changping/month/201911.html'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Safari/537.36'
}

dates,conditions,tem = [],[],[]
response = requests.get(url,headers=headers).text

tree = etree.HTML(response)
tr_list = tree.xpath('//*[@id="content"]/table//tr')
for tr in tr_list[1:]:
    date = tr.xpath('./td[1]/a/text()')[0].replace('\r\n','').strip()
    condition = tr.xpath('./td[2]/text()')[0].replace('\r\n                                        ','').strip()
    temp = tr.xpath('./td[3]/text()')[0].replace('\r\n                                        ','').strip()
    dates.append(date)
    conditions.append(condition)
    tem.append(temp)

_date = pd.DataFrame()
_date['日期'] = dates
_date['天气状况'] = conditions
_date['气温'] = tem

# 重写索引()
pd.concat([_date,]).reset_index(drop=True)
data = pd.concat([_date,])
data.to_csv('changping.csv',index=False,encoding='utf-8')

# 数据可视化
# 解决中文编码问题
plt.rcParams['font.sans-serif'] = ['SimHei']

# 解决负号显示问题
plt.rcParams['axes.unicode_minus'] = False

df = pd.read_csv('guangzhou.csv')
# print((df.isnull()).sum())#检查是否有空值,并求出数量
# 日期      0
# 天气状况    0
# 气温      0
# dtype: int64

df['最高气温'] = df['气温'].str.split('/',expand=True)[0]
df['最低气温'] = df['气温'].str.split('/',expand=True)[1]

df['最高气温'] = df['最高气温'].map(lambda x:int(x.replace('℃','')))
df['最低气温'] = df['最低气温'].map(lambda x:int(x.replace('℃','')))

dates = df['日期']
tem_hight = df['最高气温']
tem_low = df['最低气温']

flg = plt.figure(dpi=128,figsize=(10,6)) #展示生成的图大大小
plt.plot(dates, tem_hight, c='red', alpha=0.5)#c='red',:颜色 alpha=0.5:透明底
plt.plot(dates, tem_low, c='blue', alpha=0.5)


plt.fill_between(dates,tem_hight,tem_low,facecolor='blue',alpha=0.2)

# 图标格式
plt.title('北京昌平2019年11月天气',fontsize=24) #标题
plt.xlabel('日期',fontsize=6) #横坐标标题以及字体大小
flg.autofmt_xdate() 
plt.ylabel('气温',fontsize=12) #纵坐标标题以及字体大小
plt.tick_params(axis='both',which='major',labelsize=10)

plt.xticks(dates[::20])
plt.show()

posted @ 2019-12-15 18:04  corei5tj  阅读(112)  评论(0编辑  收藏  举报