python爬虫(xpath),获取某只股票的当前价格和市值
比如,我们需要实现百度某一只股票,查询出股票的当前价格和市值
我们查询”600754“这只股票的当前价格和市值
实现步骤如下:
1、导入request库,还需要额外导入lxml库(后面使用的到)
import requests #倒入requests库 from lxml import etree #倒入lxml 库(没有这个库,pip install lxml安装)
2、构造请求数据,百度搜索发起请求的数据比较多
cookies = { 'BIDUPSID': '90EF3BD78F53BC8C96DF84CD3854CA2D', 'PSTM': '1578233930', 'BD_UPN': '12314753', 'BAIDUID': '885754C8E6BD7B1A771802631815CC6D:FG=1', 'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598', 'BDUSS': 'mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV', 'COOKIE_SESSION': '7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6', 'cflag': '13%3A3', 'BD_HOME': '1', 'BDRCVFR[feWj1Vr5u3D]': 'I67x6TjHwwYf0', 'delPer': '0', 'BD_CK_SAM': '1', 'PSINO': '3', 'H_PS_PSSID': '1438_21104_26350', 'H_PS_645EC': '29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8', 'BDSVRTM': '121', 'WWW_ST': '1580466352318', } headers = { 'is_xhr': '1', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'is_pbs': '600754', 'Accept': '*/*', 'Referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq=600754&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', 'is_referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359', } params = ( ('ie', ['utf-8', 'utf-8']), ('newi', '1'), ('mod', '1'), ('isbd', '1'), ('isid', 'b379448d00013935'), ('wd', '600754'), ('rsv_spt', '1'), ('rsv_iqid', '0xa5a17c8700013159'), ('issp', '1'), ('f', '8'), ('rsv_bp', '1'), ('rsv_idx', '2'), ('rqlang', 'cn'), ('tn', 'baiduhome_pg'), ('rsv_enter', '0'), ('rsv_dl', 'tb'), ('oq', '600754'), ('rsv_t', '29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8'), ('rsv_pq', 'b379448d00013935'), ('bs', '600754'), ('rsv_sid', '1438_21104_26350'), ('_ss', '1'), ('clist', ''), ('hsug', ''), ('f4s', '1'), ('csor', '6'), ('_cr1', '29647'), )
3、发起请求,且结果转化为text(后面分析返回数据需要读取text内容)
response = requests.get('https://www.baidu.com/s', headers=headers, params=params, cookies=cookies).text
4、分析结果(分析数据用的是Xpath爬虫)具体使用方法博客有其他文章讲解:https://www.cnblogs.com/becks/p/11335493.html
html = etree.HTML(response) a = html.xpath('//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text()') print('当前价格:',a[0])#当前价格 b = html.xpath('//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text()') print('当前市值:',b[0])#当前市值
快速获取请求头信息 可以参考这篇:https://www.cnblogs.com/becks/p/12243014.html
全部代码
#本脚本实现,指定股票代码百度查询出市值和当前股价 import requests #倒入requests库 from lxml import etree #倒入lxml 库(没有这个库,pip install lxml安装) cookies = { 'BIDUPSID': '90EF3BD78F53BC8C96DF84CD3854CA2D', 'PSTM': '1578233930', 'BD_UPN': '12314753', 'BAIDUID': '885754C8E6BD7B1A771802631815CC6D:FG=1', 'BDORZ': 'B490B5EBF6F3CD402E515D22BCDA1598', 'BDUSS': 'mxYdVpwOEx0eGJsT3VUYTJXbkZJYWhKSGpQWnlqaVBwMlExTWNNRkR4cWtabHRlSVFBQUFBJCQAAAAAAAAAAAEAAACRJsY-cGlwacnxu7AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKTZM16k2TNeV', 'COOKIE_SESSION': '7_0_5_3_11_3_0_0_4_2_1_0_73199_0_169_0_1580456363_0_1580456194%7C9%23622712_32_1580376248%7C6', 'cflag': '13%3A3', 'BD_HOME': '1', 'BDRCVFR[feWj1Vr5u3D]': 'I67x6TjHwwYf0', 'delPer': '0', 'BD_CK_SAM': '1', 'PSINO': '3', 'H_PS_PSSID': '1438_21104_26350', 'H_PS_645EC': '29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8', 'BDSVRTM': '121', 'WWW_ST': '1580466352318', } headers = { 'is_xhr': '1', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 'is_pbs': '600754', 'Accept': '*/*', 'Referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&rqlang=cn&tn=baiduhome_pg&rsv_enter=0&rsv_dl=tb&oq=600754&rsv_t=29b8ZVy4WP7OUTz6%2FjeON9IexqLhOnMXkLTzhD5NfPu4fH%2FPZmThFknleY0LwzNQZ8j8&rsv_pq=b379448d00013935', 'X-Requested-With': 'XMLHttpRequest', 'Connection': 'keep-alive', 'is_referer': 'https://www.baidu.com/s?wd=600754&rsv_spt=1&rsv_iqid=0xa5a17c8700013159&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_n=2&rsv_sug3=1&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&inputT=359&rsv_sug4=359', } params = ( ('ie', ['utf-8', 'utf-8']), ('newi', '1'), ('mod', '1'), ('isbd', '1'), ('isid', 'b379448d00013935'), ('wd', '600754'), ('rsv_spt', '1'), ('rsv_iqid', '0xa5a17c8700013159'), ('issp', '1'), ('f', '8'), ('rsv_bp', '1'), ('rsv_idx', '2'), ('rqlang', 'cn'), ('tn', 'baiduhome_pg'), ('rsv_enter', '0'), ('rsv_dl', 'tb'), ('oq', '600754'), ('rsv_t', '29b8ZVy4WP7OUTz6/jeON9IexqLhOnMXkLTzhD5NfPu4fH/PZmThFknleY0LwzNQZ8j8'), ('rsv_pq', 'b379448d00013935'), ('bs', '600754'), ('rsv_sid', '1438_21104_26350'), ('_ss', '1'), ('clist', ''), ('hsug', ''), ('f4s', '1'), ('csor', '6'), ('_cr1', '29647'), ) response = requests.get('https://www.baidu.com/s', headers=headers, params=params, cookies=cookies).text html = etree.HTML(response) a = html.xpath('//span[@class = "op-stockdynamic-moretab-cur-num c-gap-right-small"]/text()') print('当前价格:',a[0])#当前价格 b = html.xpath('//ul[@class = "op-stockdynamic-moretab-info"]/li[8]/span[2]/text()') print('当前市值:',b[0])#当前市值