python html 今日概念资金流入前20名
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | #!/usr/bin/env Python<br># coding=utf-8<br>from selenium import webdriver # 导入模块<br>import time<br>from lxml import etree # lxml库是一个HTML、XML的解析器<br>import tushare as ts<br>import requests, json<br>import threading<br>import random<br>def get_init_driver():<br> time_start = time.time()<br> option = webdriver.ChromeOptions() # 创建一个配置对象(ChromeOptions是控制Chrome启动属性的类)<br> option.add_argument('headless') # 添加启动参数add_argument为'headless',开启无界面模式(设置浏览器静默,让浏览器在后台运行,不需要加载样式和渲染)<br> driver = webdriver.Chrome(options=option) # 用webdriver启动浏览器(实例化带有配置的driver对象)<br> time_end = time.time()<br> print("初始化driver花时:% f 秒" % float(time_end - time_start))<br> return driver<br><br><br># 关闭driver<br>def get_close_driver(driver):<br> driver.close()<br><br><br># 今日板块行业资金流入前20名<br># 近5日,近10日行业流金流入:https://data.eastmoney.com/bkzj/hy.html?stat=10<br># http://quote.eastmoney.com/center/boardlist.html#industry_board<br># https://data.eastmoney.com/bkzj/hy.html<br>def get_today_industry_hy_html(driver, number):<br> time2_start = time.time()<br> driver.get("https://data.eastmoney.com/bkzj/hy.html") # 跳转到指定的url地址(请求地址)<br> # time.sleep(1)<br> source = driver.page_source # 获取页面源码<br> mytree = etree.HTML(source) # 解析网页<br> # 用xpath获取所需的内容,返回一个列表<br> tables = mytree.xpath("//div[@class='dataview-body']/table")[0] # 定位表格table<br> # print("定位表格table")<br> # print(tables)<br> trs = tables.xpath('.//tr') # 取出所有tr标签(每一行单元格)<br> onetable = []<br> h = 0<br> for tr in trs:<br> h = h + 1<br> # print("第行:", h)<br> if h == 1 or h == 2:<br> continue<br> if h >= number:<br> continue<br> n = 0<br> for td in tr:<br> n = n + 1<br> # print("第列:", n)<br> if n != 2:<br> # 只取第二列<br> continue<br><br> texts = td.xpath(".//text()") # 取出所有td标签下的文本(每个单元格)<br> for text in texts:<br> onetable.append(text.strip(" ")) # 去除文本前后空格<br><br> # time.sleep(2)<br> # driver.close() # 关闭<br> time2_end = time.time()<br> hy_list_5 = get_today_industry_hy_api(6)<br> onetable.extend(hy_list_5)<br> hy_list_10 = get_today_industry_hy_api(6)<br> onetable.extend(hy_list_10)<br> onetable2 = list(set(onetable))<br> print("查询实时行业(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start))<br> return onetable2<br><br><br># 今日概念资金流入前20名 # http://quote.eastmoney.com/center/boardlist.html#concept_board def get_now_gn_html(driver, number): time2_start = time.time() driver.get( "https://data.eastmoney.com/bkzj/gn.html" ) # 跳转到指定的url地址(请求地址) # time.sleep(1) source = driver.page_source # 获取页面源码 mytree = etree.HTML(source) # 解析网页 # 用xpath获取所需的内容,返回一个列表 tables = mytree.xpath( "//div[@class='dataview-body']/table" )[ 0 ] # 定位表格table # print("定位表格table") # print(tables) trs = tables.xpath( './/tr' ) # 取出所有tr标签(每一行单元格) onetable = [] h = 0 for tr in trs: h = h + 1 # print("第行:", h) if h = = 1 or h = = 2 : continue if h > = number: continue n = 0 for td in tr: n = n + 1 # print("第列:", n) if n ! = 2 : # 只取第二列 continue texts = td.xpath( ".//text()" ) # 取出所有td标签下的文本(每个单元格) for text in texts: onetable.append(text.strip( " " )) # 去除文本前后空格 if basic_rule_1.is_string_in_array(onetable, "CPO" ): onetable.append( "共封装光学(CPO)" ) onetable.append( "CPO概念" ) if basic_rule_1.is_string_in_array(onetable, "算力概念" ): onetable.append( "东数西算(算力)" ) if basic_rule_1.is_string_in_array(onetable, "光通信模块" ): onetable.append( "共封装光学(CPO)" ) # 删除昨日涨停_含一字 # time.sleep(2) # driver.close() # 关闭 time2_end = time.time() # 合并返五日概念数组#type = 'f62' # 当前f62,近5天,f164;涨幅:f3 list5 = get_now_gn_api_type( 6 , 'f3' ) onetable.extend(list5) # 合并返10日概念数组 list10 = get_now_gn_api_type( 6 , 'f3' ) onetable.extend(list10) #print("查询实时概念(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start)) # 去除涨停概念数据 onetable = basic_rule_1.is_array_not_in_array(onetable, None ) # 去除重复 onetable2 = list ( set (onetable)) return onetable2 def get_now_gn_df_html(driver, ts_code): code = ts_code[ 0 : 6 ] sc = ts_code[ 7 : 9 ] # print(code) code = sc + code # print(code) time2_start = time.time() driver.get( "https://emweb.securities.eastmoney.com/PC_HSF10/CoreConception/Index?type=web&code=" + code) # 跳转到指定的url地址(请求地址) # time.sleep(1) source = driver.page_source # 获取页面源码 mytree = etree.HTML(source) # 解析网页 # 用xpath获取所需的内容,返回一个列表 ps = mytree.xpath( "//div[@class='summary']" ) # 定位表格table # print("定位ps") # print(ps) trs = ps[ 0 ].xpath( "//p/text()" ) # trs = ps.xpath(".//text()") # 取出所有tr标签(每一行单元格) onetable = [] h = 0 for tr in trs: h = h + 1 if h ! = 10 : continue gn_str_list = tr.strip( " " ) gn_str_list = gn_str_list.strip( '\n' ) onetable = gn_str_list.split( " " ) # print(onetable) time2_end = time.time() # print("查询实时概念花时:% f 秒" % float(time2_end-time2_start)) if len (onetable) < = 0 : print ( "在东方财富网没有到到概念数据,在Tushare查询" ) onetable = get_stock_gn(ts_code) # 去除涨停概念数据 onetable = basic_rule_1.is_array_not_in_array(onetable, None ) return onetable |
分类:
Apython量化交易
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
· 三行代码完成国际化适配,妙~啊~
2019-10-30 mven pom.xml Overriding managed version 问题解决详解
2019-10-30 spring boot jetty 配置 https ssl
2019-10-30 maven项目解决pom.xml头部 http://maven.apache.org/xsd/maven-4.0.0.xsd报错的问题
2019-10-30 mven 在个网址