python html 今日概念资金流入前20名

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env Python<br># coding=utf-8<br>from selenium import webdriver  # 导入模块<br>import time<br>from lxml import etree  # lxml库是一个HTML、XML的解析器<br>import tushare as ts<br>import requests, json<br>import threading<br>import random<br>def get_init_driver():<br>    time_start = time.time()<br>    option = webdriver.ChromeOptions()  # 创建一个配置对象(ChromeOptions是控制Chrome启动属性的类)<br>    option.add_argument('headless')  # 添加启动参数add_argument为'headless',开启无界面模式(设置浏览器静默,让浏览器在后台运行,不需要加载样式和渲染)<br>    driver = webdriver.Chrome(options=option)  # 用webdriver启动浏览器(实例化带有配置的driver对象)<br>    time_end = time.time()<br>    print("初始化driver花时:% f 秒" % float(time_end - time_start))<br>    return driver<br><br><br># 关闭driver<br>def get_close_driver(driver):<br>    driver.close()<br><br><br># 今日板块行业资金流入前20名<br># 近5日,近10日行业流金流入:https://data.eastmoney.com/bkzj/hy.html?stat=10<br># http://quote.eastmoney.com/center/boardlist.html#industry_board<br># https://data.eastmoney.com/bkzj/hy.html<br>def get_today_industry_hy_html(driver, number):<br>    time2_start = time.time()<br>    driver.get("https://data.eastmoney.com/bkzj/hy.html")  # 跳转到指定的url地址(请求地址)<br>    # time.sleep(1)<br>    source = driver.page_source  # 获取页面源码<br>    mytree = etree.HTML(source)  # 解析网页<br>    # 用xpath获取所需的内容,返回一个列表<br>    tables = mytree.xpath("//div[@class='dataview-body']/table")[0]  # 定位表格table<br>    # print("定位表格table")<br>    # print(tables)<br>    trs = tables.xpath('.//tr')  # 取出所有tr标签(每一行单元格)<br>    onetable = []<br>    h = 0<br>    for tr in trs:<br>        h = h + 1<br>        # print("第行:", h)<br>        if h == 1 or h == 2:<br>            continue<br>        if h >= number:<br>            continue<br>        n = 0<br>        for td in tr:<br>            n = n + 1<br>            # print("第列:", n)<br>            if n != 2:<br>                # 只取第二列<br>                continue<br><br>            texts = td.xpath(".//text()")  # 取出所有td标签下的文本(每个单元格)<br>            for text in texts:<br>                onetable.append(text.strip(" "))  # 去除文本前后空格<br><br>    # time.sleep(2)<br>    # driver.close()  # 关闭<br>    time2_end = time.time()<br>    hy_list_5 = get_today_industry_hy_api(6)<br>    onetable.extend(hy_list_5)<br>    hy_list_10 = get_today_industry_hy_api(6)<br>    onetable.extend(hy_list_10)<br>    onetable2 = list(set(onetable))<br>    print("查询实时行业(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start))<br>    return onetable2<br><br><br># 今日概念资金流入前20名
# http://quote.eastmoney.com/center/boardlist.html#concept_board
def get_now_gn_html(driver, number):
    time2_start = time.time()
    driver.get("https://data.eastmoney.com/bkzj/gn.html"# 跳转到指定的url地址(请求地址)
    # time.sleep(1)
    source = driver.page_source  # 获取页面源码
    mytree = etree.HTML(source)  # 解析网页
    # 用xpath获取所需的内容,返回一个列表
    tables = mytree.xpath("//div[@class='dataview-body']/table")[0# 定位表格table
    # print("定位表格table")
    # print(tables)
    trs = tables.xpath('.//tr'# 取出所有tr标签(每一行单元格)
    onetable = []
    h = 0
    for tr in trs:
        h = h + 1
        # print("第行:", h)
        if h == 1 or h == 2:
            continue
        if h >= number:
            continue
        n = 0
        for td in tr:
            n = n + 1
            # print("第列:", n)
            if n != 2:
                # 只取第二列
                continue
 
            texts = td.xpath(".//text()"# 取出所有td标签下的文本(每个单元格)
            for text in texts:
                onetable.append(text.strip(" "))  # 去除文本前后空格
 
    if basic_rule_1.is_string_in_array(onetable, "CPO"):
        onetable.append("共封装光学(CPO)")
        onetable.append("CPO概念")
    if basic_rule_1.is_string_in_array(onetable, "算力概念"):
        onetable.append("东数西算(算力)")
    if basic_rule_1.is_string_in_array(onetable, "光通信模块"):
        onetable.append("共封装光学(CPO)")
    # 删除昨日涨停_含一字
 
    # time.sleep(2)
    # driver.close()  # 关闭
    time2_end = time.time()
    # 合并返五日概念数组#type = 'f62' # 当前f62,近5天,f164;涨幅:f3
    list5 = get_now_gn_api_type(6, 'f3')
    onetable.extend(list5)
    # 合并返10日概念数组
    list10 = get_now_gn_api_type(6, 'f3')
    onetable.extend(list10)
    #print("查询实时概念(6倍数据更新一次)花时:% .3f 秒" % float(time2_end - time2_start))
 
    # 去除涨停概念数据
    onetable = basic_rule_1.is_array_not_in_array(onetable, None)
    # 去除重复
    onetable2 = list(set(onetable))
    return onetable2
 
 
def get_now_gn_df_html(driver, ts_code):
    code = ts_code[0:6]
    sc = ts_code[7:9]
    # print(code)
    code = sc + code
    # print(code)
 
    time2_start = time.time()
    driver.get(
        "https://emweb.securities.eastmoney.com/PC_HSF10/CoreConception/Index?type=web&code=" + code)  # 跳转到指定的url地址(请求地址)
    # time.sleep(1)
    source = driver.page_source  # 获取页面源码
    mytree = etree.HTML(source)  # 解析网页
    # 用xpath获取所需的内容,返回一个列表
    ps = mytree.xpath("//div[@class='summary']"# 定位表格table
    # print("定位ps")
    # print(ps)
    trs = ps[0].xpath("//p/text()")
    # trs = ps.xpath(".//text()")   # 取出所有tr标签(每一行单元格)
    onetable = []
    h = 0
    for tr in trs:
        h = h + 1
        if h != 10:
            continue
        gn_str_list = tr.strip(" ")
        gn_str_list = gn_str_list.strip('\n')
        onetable = gn_str_list.split(" ")
        # print(onetable)
 
    time2_end = time.time()
    # print("查询实时概念花时:% f 秒" % float(time2_end-time2_start))
 
    if len(onetable) <= 0:
        print("在东方财富网没有到到概念数据,在Tushare查询")
        onetable = get_stock_gn(ts_code)
 
    # 去除涨停概念数据
    onetable = basic_rule_1.is_array_not_in_array(onetable, None)
 
    return onetable

  

posted @   A汉克先生  阅读(25)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 从HTTP原因短语缺失研究HTTP/2和HTTP/3的设计差异
· 三行代码完成国际化适配,妙~啊~
历史上的今天:
2019-10-30 mven pom.xml Overriding managed version 问题解决详解
2019-10-30 spring boot jetty 配置 https ssl
2019-10-30 maven项目解决pom.xml头部 http://maven.apache.org/xsd/maven-4.0.0.xsd报错的问题
2019-10-30 mven 在个网址
点击右上角即可分享
微信分享提示