numpy+matplotlib+bs4的使用

1、首先安装库

安装pip install  BeautifulSoup

安装pip install  numpy

安装pip install  matplotlib

2、爬取指定网页数据,然后使用numpy和pandas对数据进行处理,最后使用 matplotlib 进行显示

import requests
from bs4 import BeautifulSoup
import numpy
import pandas
from matplotlib import pyplot


def getMessage(code, startdate, enddate):
    """获取网页内容组成数组返回"""
    url = "http://data.funds.hexun.com/outxml/detail/openfundnetvalue.aspx"
    data = dict(fundcode=code, startdate=startdate, enddate=enddate)
    header = {"Connection": "keep-alive",
              "Pragma": "no-cache",
              "Cache-Control": "no-cache",
              "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
              "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
              "Accept-Encoding": "gzip, deflate",
              "Accept-Language": 'zh-CN'}
    result = requests.get(url, data, headers=header).text
    soup = BeautifulSoup(result, 'html.parser')
    print(soup.prettify())  # 爬取内容后格式输出

    hearder = ["日期", "单位净值", "历史净值", "总市值"]
    datas = soup.find_all('data')  # 查找data标签内容
    datalist = []
    for data in datas:
        # 此处需要为空验证,标签内容可能为空
        fld_enddate = data.find_all('fld_enddate')[0].contents[0] if data.find_all('fld_enddate')[
            0].contents else numpy.nan
        fld_unitnetvalue = data.find_all('fld_unitnetvalue')[0].contents[0] if data.find_all('fld_unitnetvalue')[
            0].contents else numpy.nan
        fld_netvalue = data.find_all('fld_netvalue')[0].contents[0] if data.find_all('fld_netvalue')[
            0].contents else numpy.nan
        fld_newprice = data.find_all('fld_newprice')[0].contents[0] if data.find_all('fld_newprice')[
            0].contents else numpy.nan
        d = [fld_enddate, fld_unitnetvalue, fld_netvalue, fld_newprice]
        datalist.append(d)
        data = numpy.array(datalist)  # 创建一个numpy对象
    datas = pandas.DataFrame()  # 创建一个DataFrame对象
    for col, col_name in enumerate(hearder):  # 表示生成一个enumerate数组对应
        datas[col_name] = data[:, col]
    print(datas)
    return datas


def createMap(data):
    from matplotlib.font_manager import FontProperties
    font_set = FontProperties(fname=r"c:\windows\fonts\simsun.ttc", size=12)  # 解决图像中文乱码问题
    # 设置pyplot相关属性
    pyplot.title("历史净值变化", fontproperties=font_set)
    pyplot.xlabel("日期", fontproperties=font_set)
    pyplot.ylabel("单位净值", fontproperties=font_set)
    pyplot.plot(pandas.to_datetime(data['日期'], format='%Y/%m/%d'), data["单位净值"].astype(float))
    pyplot.show()


if __name__ == '__main__':
    reuslt = getMessage(code="159915", startdate='2019-11-21', enddate='2019-12-08')
    createMap(reuslt)

 

最后效果是这样

posted @ 2021-02-18 18:47  南风。  阅读(88)  评论(0编辑  收藏  举报