python爬基金经理列表、基金持有前十股票分析

股市有风险,投资需谨慎,本博客仅用于学习python知识

# coding=utf-8
import json
import random
import time

import mysql.connector
import requests
from lxml import etree
from selenium import webdriver
from selenium.webdriver.chrome.options import Options


# 主函数
def main():
    print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + " 程序启动~")
    # 初始化数据库
    # initDb()
    # 初始化表
    # initTable()
    # 获取基金经理列表
    # doGetFundManagerList()
    # 查询大于平均值的人收益排序
    result = select("SELECT * FROM manager WHERE maxRate > (SELECT SUM(maxRate)/COUNT(1) FROM manager ) ORDER BY maxRate DESC limit 50")
    for item in result:
        url = 'http://fundf10.eastmoney.com/ccmx_%s.html' % (item[6])
        # 保存基金持有前十股票数据
        doGetFundStock(url,item[6])
        time.sleep(3)
    # doGetFundStock("http://fundf10.eastmoney.com/ccmx_007685.html","007685")


# 基金持有前十股票数据
def doGetFundStock(url,fundNo):
    print("抓取基金:" + fundNo + " " + url)
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    driver = webdriver.Chrome(options=chrome_options)
    driver.get(url)
    page = driver.page_source
    # 转换成成 xpath 格式
    html = etree.HTML(page)
    td2 = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//td[2]//text()")
    td3 = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//td[3]//text()")
    td4 = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//td[4]//text()")
    td5 = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//td[5]//text()")
    td7 = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//td[7]//text()")
    td8 = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//td[8]//text()")
    td9 = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//td[9]//text()")
    time = html.xpath("//div[@class='txt_cont']//div[@id='cctable']//div[@class='box'][1]//label[@class='right lab2 xq505']//text()")
    # print(td2)
    # print(td3)
    # print(td4)
    # print(td5)
    # print(td7)
    # print(td8)
    # print(td9)
    # print(time[1])
    sql = "INSERT INTO `stock`(`stockNo`,`stockName`,`stockPrice`,`upAndDown`,`fundRate`,`fundNum`,`fundMoney`,`updateTime`,`fundNo`) VALUES"
    values = ""
    if len(td2) > 0:
        for i in range(len(td2)):
            stockNo = td2[i]
            stockName = td3[i]
            stockPrice =td4[i]
            upAndDown = td5[i]
            fundRate = td7[i]
            fundNum = td8[i]
            fundMoney = td9[i]
            updateTime = time[1]
            values += "('" + stockNo + "','" + stockName + "','" + stockPrice + "'," + str(upAndDown).replace("%", "") + "," + str(fundRate).replace("%", "") + ",'" + str(fundNum).replace(",","") + "','" + str(fundMoney).replace(",","") + "','" + updateTime + "','" + str(fundNo).replace("%", "")+"'),"
        insert((sql + values).strip(","))


# 基金经理列表
def doGetFundManagerList():
    # 地址基金
    # 全部
    # url = "http://fund.eastmoney.com/Data/FundDataPortfolio_Interface.aspx?dt=14&mc=returnjson&ft=all&pn=5000&pi=1&sc=abbname&st=asc"
    # 股票基金
    url = "http://fund.eastmoney.com/Data/FundDataPortfolio_Interface.aspx"
    # 请求头
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36",
    }
    # 参数
    params = {
        "dt": 14,
        "mc": "returnjson",
        "ft": "gp",
        "pn": 50,
        "pi": 1,
        "sc": "abbname",
        "st": "asc",
    }
    # 请求第一页
    response = requests.get(url=url, params=params, headers=headers)
    text = response.text
    # str = 'var returnjson= {data:[["30634044","艾定飞","80053204","华商基金","007685,007853","华商电子行业量化股票,华商计算机行业量化股票","489","81.32%","007685","华商电子行业量化股票","7.34亿元","81.32%"],["30277862","艾小军","80000224","国泰基金","007817,007818,010210,020021,160224,501016,501019,510230,512660,512720,512760,512880,515880","国泰中证全指通信设备ETF联接A,国泰中证全指通信设备ETF联接C,国泰中证计算机主题ETF联接C,国泰上证180金融ETF联接,国泰中证计算机主题ETF联接A,国泰中证申万证券行业指数,国泰国证航天军工指数,国泰上证180金融ETF,国泰中证军工ETF,国泰中证计算机ETF,国泰CES半导体芯片ETF,国泰中证全指证券公司ETF,国泰中证全指通信设备ETF","2562","161.00%","512760","国泰CES半导体芯片ETF","685.59亿元","161.00%"],["30648059","彬彬","80061431","人保资产","006600,006611","人保沪深300指数,人保中证500指数","291","58.65%","006600","人保沪深300指数","5.24亿元","58.65%"],["30158947","薄官辉","80000235","银华基金","001163,006251","银华中国梦30股票,银华兴盛股票","2091","145.00%","001163","银华中国梦30股票","27.70亿元","145.00%"],["30324911","白海峰","80036782","招商基金","005761,005762,515160","招商MSCI中国A股国际ETF联接A,招商MSCI中国A股国际ETF联接C,招商MSCI中国A股国际通ETF","1011","58.85%","005761","招商MSCI中国A股国际ETF联接A","13.42亿元","58.85%"],["30037457","毕天宇","80000221","富国基金","000513","富国高端制造行业股票","2404","275.80%","000513","富国高端制造行业股票","65.65亿元","275.80%"],["30289521","鲍无可","80000251","景顺长城基金","000979","景顺长城沪港深精选股票","1696","90.14%","000979","景顺长城沪港深精选股票","92.64亿元","90.14%"],["30516132","陈伯祯","80000237","银河基金","008709,501307,501308","银河龙头股票,银河中证沪港深高股息A,银河中证沪港深高股息C","937","4.44%","501307","银河中证沪港深高股息A","20.36亿元","4.44%"],["30340691","曹春林","80380794","创金合信基金","005927,005928,011146,011147,011229,011230","创金合信新能源汽车股票A,创金合信新能源汽车股票C,创金气候变化责任投资股票A,创金气候变化责任投资股票C,创金合信数字经济主题股票A,创金合信数字经济主题股票C","986","126.49%","005927","创金合信新能源汽车股票A","17.93亿元","126.49%"],["30707945","崔宸龙","80280038","前海开源基金","005669,006923,006924","前海开源公用事业股票,前海开源沪港深非周期股票A,前海开源沪港深非周期股票C","182","62.26%","005669","前海开源公用事业股票","1.21亿元","62.26%"],["30721223","池陈森","80163340","安信基金","010709,010710","安信医药健康股票A,安信医药健康股票C","6","0.33%","010709","安信医药健康股票A","2.69亿元","0.33%"],["30283570","蔡丹","80000236","宝盈基金","007580,213010","宝盈中证100指数增强C,宝盈中证100指数增强A","1262","97.20%","213010","宝盈中证100指数增强A","21.71亿元","97.20%"],["30691376","陈顾君","80403111","浙商证券资管","003366","浙商汇金中证转型成长指数","363","26.81%","003366","浙商汇金中证转型成长指数","0.20亿元","26.81%"],["30672361","陈亘斯","80000227","长盛基金","160806,160807,160814,502013,502040,502053,519100","长盛同庆中证800(LOF),长盛沪深300指数(LOF),长盛中证金融地产指数(LOF),长盛中证申万一带一路指数(LOF),长盛上证50指数(LOF),长盛中证全指证券指数(LOF),长盛中证100指数","599","71.26%","502040","长盛上证50指数(LOF)","24.77亿元","71.26%"],["30547037","陈恒","80053204","华商基金","008107","华商医药医疗行业股票","388","44.68%","008107","华商医药医疗行业股票","3.40亿元","44.68%"],["30189737","陈皓","80000229","易方达基金","009341","易方达均衡成长股票","241","26.62%","009341","易方达均衡成长股票","435.16亿元","26.62%"],["30674014","程卉超","80560396","合煦智远基金","007287,007288","合煦智远消费主题股票发起式A,合煦智远消费主题股票发起式C","565","40.97%","007287","合煦智远消费主题股票发起式A","0.25亿元","40.97%"],["30562202","成钧","80168726","平安基金","004403,004404,005639,005640,005868,005869,006214,006215,009012,009013,159964,510390,510590,512360,512970","平安股息精选沪港深A,平安股息精选沪港深C,平安300ETF联接A,平安300ETF联接C,平安MSCI中国A股ETF联接A,平安MSCI中国A股ETF联接C,平安500ETF联接A,平安500ETF联接C,平安创业板ETF联接A,平安创业板ETF联接C,平安创业板ETF,平安沪深300ETF,平安中证500ETF,平安MSCI中国A股国际ETF,平安粤港澳大湾区ETF","1120","86.58%","159964","平安创业板ETF","125.27亿元","86.58%"],["30105820","陈建华","80000250","华宝基金","007405,240014","华宝中证100指数C,华宝中证100指数A","2949","210.97%","240014","华宝中证100指数A","8.33亿元","210.97%"],["30198507","崔俊杰","80000251","景顺长城基金","000688,001361,512220","景顺长城研究精选股票,景顺中证科技传媒通信150ETF联接,景顺中证科技传媒通信150ETF","2617","12.99%","000688","景顺长城研究精选股票","20.83亿元","48.30%"],["30367576","陈建军","80380794","创金合信基金","003190,003191","创金合信消费主题股票A,创金合信消费主题股票C","599","102.90%","003190","创金合信消费主题股票A","1.02亿元","102.90%"],["30554638","陈金伟","80000236","宝盈基金","001877","宝盈国家安全沪港深股票","58","10.56%","001877","宝盈国家安全沪港深股票","1.28亿元","10.56%"],["30516945","蔡卡尔","80000221","富国基金","005707,161035,161037,515400","富国港股通量化精选股票型,富国中证医药主题指数增强,富国中证高端制造指数增强型,富国中证大数据产业ETF","1473","128.27%","161035","富国中证医药主题指数增强","12.70亿元","128.27%"],["30366278","陈龙","80000230","鹏华基金","007932,008001,010364,159982,159993,160625,160630,160633,160643","鹏华中证500ETF联接A,鹏华中证500ETF联接C,鹏华空天军工指数(LOF)C,鹏华中证500ETF,鹏华国证证券龙头ETF,鹏华证券保险指数(LOF),鹏华中证国防指数(LOF),鹏华中证全指证券公司指数(LOF),鹏华空天军工指数(LOF)A","1674","80.26%","160630","鹏华中证国防指数(LOF)","120.12亿元","80.26%"],["30616472","崔蕾","80000220","南方基金","001420,001426,002906,002907,004346,004432,004433,008163,008164,009079,009080,010990,159984,202021,510160,512100,512400,515450","南方大数据300A,南方大数据300C,南方中证500量化增强A,南方中证500量化增强C,南方小康ETF联接C,南方有色金属ETF联接A,南方有色金属ETF联接C,南方大盘红利50ETF联接A,南方大盘红利50ETF联接C,南方粤港澳大湾区ETF联接A,南方粤港澳大湾区ETF联接C,南方有色金属ETF联接E,南方粤港澳大湾区ETF,南方小康ETF联接A,中证南方小康产业指数ETF,南方中证1000ETF,南方中证申万有色金属ETF,南方中国A股大盘红利低波50ETF","802","71.01%","002906","南方中证500量化增强A","79.94亿元","71.01%"],["30692563","常璐","80294346","太平基金","009794","太平智选一年定开股票","157","19.18%","009794","太平智选一年定开股票","22.39亿元","19.18%"],["30700110","曹璐迪","80000221","富国基金","006748,007191,159963,159971,512040","富国中证价值ETF联接A,富国中证价值ETF联接C,富国恒生中国企业ETF,富国创业板ETF,富国中证价值ETF","243","29.85%","512040","富国中证价值ETF","3.41亿元","29.85%"],["30292456","程敏","80365985","北信瑞丰","004352","北信瑞丰研究精选","231","39.61%","004352","北信瑞丰研究精选","1.01亿元","39.61%"],["30176879","蔡目荣","80000250","华宝基金","240005","华宝多策略增长开放","1901","51.27%","240005","华宝多策略增长开放","18.71亿元","51.27%"],["30361543","陈平","80067635","汇丰晋信基金","540010","汇丰晋信科技先锋股票","2004","41.41%","540010","汇丰晋信科技先锋股票","19.15亿元","41.41%"],["30649372","储荞","80385906","红土创新基金","006698,006699,006783,006784","红土创新沪深300增强A,红土创新沪深300增强C,红土创新中证500增强A,红土创新中证500增强C","693","62.71%","006698","红土创新沪深300增强A","0.91亿元","62.71%"],["30298306","陈启明","80037023","华富基金","009398","华富成长企业精选股票","214","8.34%","009398","华富成长企业精选股票","36.04亿元","8.34%"],["30065466","陈士俊","80091787","浦银安盛基金","007410,007411,009374,009375,159810,166402,512590,515780,519116,519117","浦银安盛中证高股息ETF联接A,浦银安盛中证高股息ETF联接C,浦银安盛MSCI中国A股ETF联接A,浦银安盛MSCI中国A股ETF联接C,浦银安盛创业板ETF,浦银安盛沪港深基本面100,浦银安盛中证高股息ETF,浦银安盛MSCI中国A股ETF,浦银安盛沪深300指数增强,浦银安盛基本面400指数","3692","151.87%","519116","浦银安盛沪深300指数增强","16.94亿元","151.87%"],["30362197","陈思郁","80050229","上投摩根基金","001009","上投摩根安全战略股票","1550","147.00%","001009","上投摩根安全战略股票","12.79亿元","147.00%"],["30362195","程桯","80000235","银华基金","009017","银华港股通精选股票发起式","292","42.57%","009017","银华港股通精选股票发起式","45.59亿元","42.57%"],["30439357","蔡晓","80106677","民生加银基金","004532,004533,690008","民生加银中证港股通指数A,民生加银中证港股通指数C,民生中证内地资源主题指数","1841","23.77%","690008","民生中证内地资源主题指数","8.08亿元","23.77%"],["30366471","成曦","80000229","易方达基金","004742,004744,110019,110026,159901,159915,513090,516080,588080","易方达深证100ETF联接C,易方达创业板ETF联接C,易方达深证100ETF联接A,易方达创业板ETF联接A,易方达深证100ETF,易方达创业板ETF,易方达中证香港证券投资ETF,易方达中证创新药产业ETF,易方达上证科创板50成份ETF","1717","121.51%","110019","易方达深证100ETF联接A","493.85亿元","121.51%"],["30572459","陈欣","80498278","汇安基金","510200","汇安上证证券ETF","243","27.05%","510200","汇安上证证券ETF","45.19亿元","27.05%"],["30493843","陈小鹭","80064225","工银瑞信基金","001719,007674,007675","工银国家战略股票,工银产业升级股票A,工银产业升级股票C","1579","151.34%","001719","工银国家战略股票","43.54亿元","151.34%"],["30438442","陈璇淼","80000230","鹏华基金","005268","鹏华优势企业","1146","140.51%","005268","鹏华优势企业","159.82亿元","140.51%"],["30677271","陈薪羽","80341238","中融基金","005569,005570,006743,006744,007885,007886,159965,515550","中融智选红利股票A,中融智选红利股票C,中融央视财经50ETF联接A,中融央视财经50ETF联接C,中融中证500ETF联接A,中融中证500ETF联接C,中融央视财经50ETF,中融中证500ETF","536","59.55%","159965","中融央视财经50ETF","2.88亿元","59.55%"],["30495360","程彧","80067635","汇丰晋信基金","002332,002333","汇丰晋信沪港深A,汇丰晋信沪港深C","1530","116.36%","002332","汇丰晋信沪港深A","14.55亿元","116.36%"],["30570026","陈瑶","80041198","天弘基金","000961,000962,001548,001549,001552,001553,001594,001595,005918,005919,008114,008115,010955,010956,159820,515290,515330","天弘沪深300ETF联接A,天弘中证500ETF联接A,天弘上证50指数A,天弘上证50指数C,天弘中证证券保险A,天弘中证证券保险C,天弘中证银行ETF联接A,天弘中证银行ETF联接C,天弘沪深300ETF联接C,天弘中证500ETF联接C,天弘中证红利低波动100A,天弘中证红利低波动100C,天弘中证智能汽车指数发起式A,天弘中证智能汽车指数发起式C,天弘中证500ETF,天弘中证银行ETF,天弘沪深300ETF","1071","51.35%","001548","天弘上证50指数A","268.12亿元","51.35%"],["30062965","蔡宇滨","80049689","诺安基金","001208,010349,320020","诺安低碳经济股票A,诺安低碳经济股票C,诺安策略精选股票","933","83.05%","001208","诺安低碳经济股票A","18.85亿元","83.05%"],["30282873","陈一峰","80163340","安信基金","000577,000974","安信价值精选股票,安信消费医药股票","2464","412.20%","000577","安信价值精选股票","85.91亿元","412.20%"],["30643767","陈叶雁南","80000223","嘉实基金","006803","嘉实互通精选股票","698","37.81%","006803","嘉实互通精选股票","41.11亿元","37.81%"],["30709888","陈樱子","80000248","广发基金","004995,010245","广发品牌消费股票发起式A,广发品牌消费股票发起式C","166","28.41%","010245","广发品牌消费股票发起式C","4.43亿元","28.41%"],["30131775","蔡铮","80064562","交银施罗德基金","007464,007465,159913,164905,164908,510010,519686,519706","交银创业板50指数A,交银创业板50指数C,交银深证300价值ETF,交银新能源指数(LOF),交银中证环境治理(LOF),交银上证180公司治理ETF,交银上证180公司治理联接,交银深证300价值ETF联接","2944","178.55%","159913","交银深证300价值ETF","40.20亿元","178.55%"],["30042300","程洲","80000224","国泰基金","001579","国泰大农业股票","1994","156.90%","001579","国泰大农业股票","199.73亿元","156.90%"],["30334870","常蓁","80000223","嘉实基金","006604,006605,008958","嘉实消费精选股票A,嘉实消费精选股票C,嘉实回报精选股票","318","50.95%","008958","嘉实回报精选股票","72.07亿元","50.95%"]],record:614,pages:13,curpage:1}'
    pages = text[(text.index("pages") + 6):(text.index("curpage") - 1)]
    # data = str[(str.index("[[")):(str.index("]]")+2)]
    print("总页数:" + pages)
    num = 1
    while num <= int(pages):
        time.sleep(random.randint(5, 10))
        params['pi'] = num
        response = requests.get(url=url, params=params, headers=headers)
        text = response.text
        data = text[(text.index("[[")):(text.index("]]") + 2)]
        print("" + str(num) + "页数据")
        print(data)
        saveFundManagerToMysql(data)
        num += 1


# 保存基金经理到数据库
def saveFundManagerToMysql(data):
    # ["30634044","艾定飞","80053204","华商基金","007685,007853","华商电子行业量化股票,华商计算机行业量化股票","489","81.32%","007685","华商电子行业量化股票","7.34亿元","81.32%"]
    sql = "INSERT INTO `manager`(`workNo`,`name`,`companyNo`,`company`,`fundNo`,`fundIn`,`workTime`,`maxRate`) VALUES"
    values = ""
    for list in json.loads(data):
        values += "('" + list[0] + "','" + list[1] + "','" + list[2] + "','" + list[3] + "','" + list[8] + "','" + list[
            9] + "'," + list[6] + "," + str(list[11]).replace("%", "").replace("--", "0") + "),"
    insert((sql + values).strip(","))


# 保存基金持有前十的股票数据
def saveFundStockToMysql(data):
    print(data)


# 连接数据库
def connect():
    db = mysql.connector.connect(host="localhost", user="root", passwd="root")
    return db


# init数据库
def initDb():
    db = connect()
    cursor = db.cursor()
    print("init database fund")
    cursor.execute("CREATE DATABASE IF NOT EXISTS fund")
    db.commit()


# init表
def initTable():
    db = connect()
    cursor = db.cursor()
    # 选中数据库
    print("use fund database")
    cursor.execute("USE fund")
    # 基金经理表
    print("init table manager")
    cursor.execute("CREATE TABLE IF NOT EXISTS `manager`("
                   "`id` BIGINT(10) NOT NULL  AUTO_INCREMENT COMMENT '主键' , "
                   "`workNo` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '基金经理职业编号' , "
                   "`name` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '基金经理名称' , "
                   "`companyNo` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '基金经理所属公司编号' ,"
                   "`company` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '基金经理所属公司' , "
                   "`fundIn` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '基金经理任职基金' , "
                   "`fundNo` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '任职基金代码' , "
                   "`workTime` INT(50) COLLATE utf8_general_ci NULL  COMMENT '基金经理累计从业时间' , "
                   "`maxRate` DOUBLE(10,2) NULL  COMMENT '最高回报率' , "
                   "PRIMARY KEY (`id`) "
                   ") ENGINE=INNODB DEFAULT CHARSET='utf8' COLLATE='utf8_general_ci'")
    # 基金前十重仓股票表
    print("init table stock")
    cursor.execute("CREATE TABLE IF NOT EXISTS `stock`("
                   "`id` BIGINT(10) NOT NULL  AUTO_INCREMENT COMMENT '主键' , "
                   "`stockNo` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '股票代码' , "
                   "`stockName` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '股票名称' , "
                   "`stockPrice` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '最新价格' ,"
                   "`upAndDown` DOUBLE(10,2) COLLATE utf8_general_ci NULL  COMMENT '涨跌幅' , "
                   "`fundRate` DOUBLE(10,2) COLLATE utf8_general_ci NULL  COMMENT '所占比例' , "
                   "`fundNum` INT(255) COLLATE utf8_general_ci NULL  COMMENT '持股数量' , "
                   "`fundNo` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '基金代码' , "
                   "`fundMoney` VARCHAR(255) COLLATE utf8_general_ci NULL  COMMENT '持仓市值' , "
                   "`updateTime` INT(255) COLLATE utf8_general_ci NULL  COMMENT '数据更新时间' , "
                   "PRIMARY KEY (`id`) "
                   ") ENGINE=INNODB DEFAULT CHARSET='utf8' COLLATE='utf8_general_ci'")
    db.commit()


# 插入数据
def insert(sql):
    db = connect()
    cursor = db.cursor()
    cursor.execute("use fund")
    print("插入数据:" + sql)
    cursor.execute(sql)
    db.commit()


# 清空数据
def truncate(tableName):
    db = connect()
    cursor = db.cursor()
    cursor.execute("use fund")
    print("truncate table " + tableName)
    cursor.execute("truncate table " + tableName)
    db.commit()


# 删除数据
def delete(sql):
    db = connect()
    cursor = db.cursor()
    cursor.execute("use fund")
    print(sql)
    cursor.execute(sql)
    db.commit()


# 查询数据
def select(sql):
    db = connect()
    cursor = db.cursor()
    cursor.execute("use fund")
    print(sql)
    cursor.execute(sql)
    return cursor.fetchall()


# 启动入口
if __name__ == '__main__':
    main()

 

posted @ 2021-01-19 17:18  缘故为何  阅读(1023)  评论(0编辑  收藏  举报