numpy+matplotlib+bs4的使用
1、首先安装库
安装pip install BeautifulSoup
安装pip install numpy
安装pip install matplotlib
2、爬取指定网页数据,然后使用numpy和pandas对数据进行处理,最后使用 matplotlib 进行显示
import requests from bs4 import BeautifulSoup import numpy import pandas from matplotlib import pyplot def getMessage(code, startdate, enddate): """获取网页内容组成数组返回""" url = "http://data.funds.hexun.com/outxml/detail/openfundnetvalue.aspx" data = dict(fundcode=code, startdate=startdate, enddate=enddate) header = {"Connection": "keep-alive", "Pragma": "no-cache", "Cache-Control": "no-cache", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Encoding": "gzip, deflate", "Accept-Language": 'zh-CN'} result = requests.get(url, data, headers=header).text soup = BeautifulSoup(result, 'html.parser') print(soup.prettify()) # 爬取内容后格式输出 hearder = ["日期", "单位净值", "历史净值", "总市值"] datas = soup.find_all('data') # 查找data标签内容 datalist = [] for data in datas: # 此处需要为空验证,标签内容可能为空 fld_enddate = data.find_all('fld_enddate')[0].contents[0] if data.find_all('fld_enddate')[ 0].contents else numpy.nan fld_unitnetvalue = data.find_all('fld_unitnetvalue')[0].contents[0] if data.find_all('fld_unitnetvalue')[ 0].contents else numpy.nan fld_netvalue = data.find_all('fld_netvalue')[0].contents[0] if data.find_all('fld_netvalue')[ 0].contents else numpy.nan fld_newprice = data.find_all('fld_newprice')[0].contents[0] if data.find_all('fld_newprice')[ 0].contents else numpy.nan d = [fld_enddate, fld_unitnetvalue, fld_netvalue, fld_newprice] datalist.append(d) data = numpy.array(datalist) # 创建一个numpy对象 datas = pandas.DataFrame() # 创建一个DataFrame对象 for col, col_name in enumerate(hearder): # 表示生成一个enumerate数组对应 datas[col_name] = data[:, col] print(datas) return datas def createMap(data): from matplotlib.font_manager import FontProperties font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12) # 解决图像中文乱码问题 # 设置pyplot相关属性 pyplot.title("历史净值变化", fontproperties=font_set) pyplot.xlabel("日期", fontproperties=font_set) pyplot.ylabel("单位净值", fontproperties=font_set) pyplot.plot(pandas.to_datetime(data['日期'], format='%Y/%m/%d'), data["单位净值"].astype(float)) pyplot.show() if __name__ == '__main__': reuslt = getMessage(code="159915", startdate='2019-11-21', enddate='2019-12-08') createMap(reuslt)
最后效果是这样