python爬虫(xpath),获取某只股票的当前价格和市值

比如,我们需要实现百度某一只股票,查询出股票的当前价格和市值

我们查询”600754“这只股票的当前价格和市值

 

 

 

实现步骤如下:

1、导入request库,还需要额外导入lxml库(后面使用的到)

import requests   #倒入requests库
from lxml import etree  #倒入lxml 库(没有这个库,pip install lxml安装)

2、构造请求数据,百度搜索发起请求的数据比较多

cookies = {
    'BIDUPSID': '90EF3BD78F53BC8C96DF84CD3854CA2D',
    'PSTM': '1578233930',
    'BD_UPN': '12314753',
    'BAIDUID': '885754C8E6BD7B1A771802631815CC6D:FG=1',
    'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
    'BDUSS': 'mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV',
    'COOKIE_SESSION': '7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6',
    'cflag': '13%3A3',
    'BD_HOME': '1',
    'BDRCVFR[feWj1Vr5u3D]': 'I67x6TjHwwYf0',
    'delPer': '0',
    'BD_CK_SAM': '1',
    'PSINO': '3',
    'H_PS_PSSID': '1438_21104_26350',
    'H_PS_645EC': '29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8',
    'BDSVRTM': '121',
    'WWW_ST': '1580466352318',
}

headers = {
    'is_xhr': '1',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    'is_pbs': '600754',
    'Accept': '*/*',
    'Referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq=600754&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935',
    'X-Requested-With': 'XMLHttpRequest',
    'Connection': 'keep-alive',
    'is_referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359',
}

params = (
    ('ie', ['utf-8', 'utf-8']),
    ('newi', '1'),
    ('mod', '1'),
    ('isbd', '1'),
    ('isid', 'b379448d00013935'),
    ('wd', '600754'),
    ('rsv_spt', '1'),
    ('rsv_iqid', '0xa5a17c8700013159'),
    ('issp', '1'),
    ('f', '8'),
    ('rsv_bp', '1'),
    ('rsv_idx', '2'),
    ('rqlang', 'cn'),
    ('tn', 'baiduhome_pg'),
    ('rsv_enter', '0'),
    ('rsv_dl', 'tb'),
    ('oq', '600754'),
    ('rsv_t', '29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8'),
    ('rsv_pq', 'b379448d00013935'),
    ('bs', '600754'),
    ('rsv_sid', '1438_21104_26350'),
    ('_ss', '1'),
    ('clist', ''),
    ('hsug', ''),
    ('f4s', '1'),
    ('csor', '6'),
    ('_cr1', '29647'),
)

3、发起请求,且结果转化为text(后面分析返回数据需要读取text内容)

response = requests.get('https://www.baidu.com/s', headers=headers, params=params, cookies=cookies).text

4、分析结果(分析数据用的是Xpath爬虫)具体使用方法博客有其他文章讲解:https://www.cnblogs.com/becks/p/11335493.html

html = etree.HTML(response)
a = html.xpath('//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text()')
print('当前价格:',a[0])#当前价格
b = html.xpath('//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text()')
print('当前市值:',b[0])#当前市值

快速获取请求头信息 可以参考这篇:https://www.cnblogs.com/becks/p/12243014.html

 全部代码

#本脚本实现,指定股票代码百度查询出市值和当前股价

import requests   #倒入requests库
from lxml import etree  #倒入lxml 库(没有这个库,pip install lxml安装)

cookies = {
    'BIDUPSID': '90EF3BD78F53BC8C96DF84CD3854CA2D',
    'PSTM': '1578233930',
    'BD_UPN': '12314753',
    'BAIDUID': '885754C8E6BD7B1A771802631815CC6D:FG=1',
    'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598',
    'BDUSS': 'mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV',
    'COOKIE_SESSION': '7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6',
    'cflag': '13%3A3',
    'BD_HOME': '1',
    'BDRCVFR[feWj1Vr5u3D]': 'I67x6TjHwwYf0',
    'delPer': '0',
    'BD_CK_SAM': '1',
    'PSINO': '3',
    'H_PS_PSSID': '1438_21104_26350',
    'H_PS_645EC': '29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8',
    'BDSVRTM': '121',
    'WWW_ST': '1580466352318',
}

headers = {
    'is_xhr': '1',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'zh-CN,zh;q=0.9',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
    'is_pbs': '600754',
    'Accept': '*/*',
    'Referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq=600754&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935',
    'X-Requested-With': 'XMLHttpRequest',
    'Connection': 'keep-alive',
    'is_referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359',
}

params = (
    ('ie', ['utf-8', 'utf-8']),
    ('newi', '1'),
    ('mod', '1'),
    ('isbd', '1'),
    ('isid', 'b379448d00013935'),
    ('wd', '600754'),
    ('rsv_spt', '1'),
    ('rsv_iqid', '0xa5a17c8700013159'),
    ('issp', '1'),
    ('f', '8'),
    ('rsv_bp', '1'),
    ('rsv_idx', '2'),
    ('rqlang', 'cn'),
    ('tn', 'baiduhome_pg'),
    ('rsv_enter', '0'),
    ('rsv_dl', 'tb'),
    ('oq', '600754'),
    ('rsv_t', '29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8'),
    ('rsv_pq', 'b379448d00013935'),
    ('bs', '600754'),
    ('rsv_sid', '1438_21104_26350'),
    ('_ss', '1'),
    ('clist', ''),
    ('hsug', ''),
    ('f4s', '1'),
    ('csor', '6'),
    ('_cr1', '29647'),
)

response = requests.get('https://www.baidu.com/s', headers=headers, params=params, cookies=cookies).text
html = etree.HTML(response)
a = html.xpath('//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text()')
print('当前价格:',a[0])#当前价格
b = html.xpath('//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text()')
print('当前市值:',b[0])#当前市值

 

posted @ 2020-02-01 20:09  小贝书屋  阅读(2514)  评论(0编辑  收藏  举报