英为财情数据采集

一个股票网站

# -*- coding: utf-8 -*-
# @Time    : 2019/9/28 17:12

import requests

def get_headers():
    url = "https://cn.investing.com/stock-screener/?sp=country::6|sector::a|industry::16|equityType::a|exchange::a%3Ceq_market_cap;1"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36',
    }
    res = requests.get(url,headers=headers)
    p = res.cookies.get_dict()
    adBlockerNewUserDomains = p.get("adBlockerNewUserDomains")
    PHPSESSID = p.get("PHPSESSID")
    StickySession = p.get("StickySession")
    geoC = p.get("geoC")
    nyxDorf = p.get("nyxDorf")

    headers['Cookie'] = 'PHPSESSID={}; geoC={}; StickySession={}; adBlockerNewUserDomains={};billboardCounter_6={};nyxDorf={};_ga={}; _gid={};_gat={};_gat_allSitesTracker={}'.format(PHPSESSID,geoC,StickySession,adBlockerNewUserDomains,1,nyxDorf,"GA1.2.1925136288.1569661333","GA1.2.1541369468.1569661333",1,1)

    return headers

def get_info():
    info_url = "https://cn.investing.com/stock-screener/Service/SearchStocks"
    header = get_headers()
    headers = {
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "Accept-Encoding": "gzip, deflate, br",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Content-Length": "447",
        "Content-Type": "application/x-www-form-urlencoded",
        "Cookie": "{}".format(header),
        "Host": "cn.investing.com",
        "Origin": "https://cn.investing.com",
        "Pragma": "no-cache",
        "Referer": "https://cn.investing.com/stock-screener/?sp=country::6|sector::a|industry::16|equityType::a|exchange::a%3Ceq_market_cap;1",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-origin",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36",
        "X-Requested-With": "XMLHttpRequest",

    }

    data = {
        "country[]": "6",
        "sector": "2,11,7,10,1,4,9,5,8,3,6,12",
        "industry": "16",
        "equityType": "ORD,DRC,Preferred,Unit,ClosedEnd,REIT,ELKS,OpenEnd,Right,ParticipationShare,CapitalSecurity,PerpetualCapitalSecurity,GuaranteeCertificate,IGC,Warrant,SeniorNote,Debenture,ETF,ADR,ETC,ETN",
        "exchange[]": "127",
        "exchange[]": "108",
        "exchange[]": "109",
        "exchange[]": "51",
        "pn": "1",    # 翻页  可自行控制
        "order[col]": "eq_market_cap",
        "order[dir]": "d",
    }
    ret  = requests.post(url=info_url,headers=headers,data=data,timeout=10)
    # print(ret.cookies.get_dict())   # 查看cookie
    for i in ret.json()["hits"]:
        print(i)
get_info()

 

此网站想要从接口直接获取数据必须先获取cookie,也就是进入网站首页时实际上cookie值就已经设置好了,我们需要先去请求首页获取cookie,在带着cookie去请求数据接口从而获取数据

posted @ 2019-09-30 20:16  叫我大表哥  阅读(3554)  评论(2编辑  收藏  举报