抓取 - lgy514 - 博客园

公告

from bs4 import BeautifulSoup
import requests
import tushare as ts

# #获取2014年第3季度的盈利能力数据
# a = ts.get_profit_data(2014,3)  # ROE
# print("len a:",len(a))
# print(a)
# codelist = []
# roelist = []
# codelist = a.code.tolist()
# roelist = a.roe.tolist()

# print(ts.get_stock_basics()) #PE PB (市盈率 市净率)

# a = ts.get_hist_data('600848', ktype='w',start='2020-06-01')
def loadROEts():
    codelist = []
    roelist  = []
    a = ts.get_profit_data(2020,1)  # ROE
    # print("len a:",len(a))
    # print(a)
    print('get_profit_data OK')
    codelist = a.code.tolist()
    roelist  = a.roe.tolist()
    print('tolist OK')
    # codelist = ['123','456']
    # roelist = [12.3,45.6]
    import sqlite3
    conn = sqlite3.connect('test.db')
    c = conn.cursor()
    # c.execute('''CREATE TABLE roe
    #        (code        CHAR(6),
    #        ROE         REAL);''')
    for index,code in enumerate(codelist):
        sqlst = '''
        insert into GX values('{}',{})
        '''.format(code,
                   roelist[index] if type(roelist[index]) is not str else 0)
        try:
            c.execute(sqlst)
        except:
            print('err:',sqlst)
        if index%100 == 0:
            print('inserting...',index)
            conn.commit()
    c.close()
    conn.commit()
    conn.close()
def loadPEPBts():
    pass
# 网页的访问
def get_html_text(url):
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36',
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
        }
        # r = requests.get(url, headers=headers)
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except requests.RequestException as e:
        print(e)
    else:
        result = r.json()
        print(type(result), result, sep='\n')
    # try:
    #     r = requests.get(URL, params={'ip': '8.8.8.8'}, timeout=1)
    #     r.raise_for_status()  # 如果响应状态码不是 200，就主动抛出异常
    # except requests.RequestException as e:
    #     print(e)
    # else:
    #     result = r.json()
    #     print(type(result), result, sep='\n')
def getEarnings_ratio():
    import sqlite3
    conn = sqlite3.connect('test.db')
    c = conn.cursor()
    # c.execute('''CREATE TABLE pber
    #        (code        CHAR(6),
    #         pb         REAL,
    #        er         REAL);''')
    # conn.commit()
    v = c.execute('''select * from GX;''')
    c = conn.cursor()
    for index,l in enumerate(v):
        url = '''http://www.csindex.com.cn/zh-CN/downloads/industry-price-earnings-ratio-detail?date={}&class=1&search=1&csrc_code={}'''.format("2020-11-12", l[0])  # 股息率
        html = get_html_text(url)
        soup = BeautifulSoup(html, "html.parser")
        td = soup.find_all("td")
        print(index)
        if 0!=len(td) :
            f10 = str(td[10].string)
            if f10 != " -- ":
                f10f = float(f10)
                print('PB:',td[9].string,"er:",td[10].string)
                try:
                    sqlst = '''insert into pber values('{}',{},{});'''.format(l[0], float(str(td[9].string)), float(str(td[10].string)))
                    c.execute(sqlst)
                    if index%5==0:
                        conn.commit()
                    print(sqlst)
                except:
                    print('err:',index)
    c.close()
    conn.commit()
    conn.close()

    # url = '''http://www.csindex.com.cn/zh-CN/downloads/industry-price-earnings-ratio-detail?
    # date={}&class=1&search=1&csrc_code={}'''.format("2020-11-12",'002242')#股息率
    # html = get_html_text(url)
    # soup = BeautifulSoup(html,"html.parser")
    # td = soup.find_all("td")
    # for i,v in enumerate(td):
    #     print(v.string)
    # return td


# url = '''http://quote.eastmoney.com/sh605007.html'''
# html = get_html_text(url)
# soup = BeautifulSoup(html,"html.parser")
# # td = soup.find_all("td")
# td = soup.find_all("div",class_='cwzb')
# # td = soup.find_all("tbody",id='cwzbDataBox')
# for i,v in enumerate(td):
#     print(v.string)
# print('end')

if __name__ == '__main__':
    # loadROEts()
    getEarnings_ratio()
posted on 2020-11-15 03:42 lgy514 阅读(374) 评论(0) 编辑收藏举报
会员力量，点亮园子希望
刷新页面返回顶部