python使用selenium和requests.session登录抓取

# Author:song
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from requests import session
import requests

def get_cookies():
    driver = webdriver.Chrome()
    driver.get('https://www.zhihu.com/signup?next=%2F')

    locad_butter = driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.SignContainer-switch > span')
    locad_butter.click()
    import  time
    time.sleep(2)
    username = driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.Login-content > form > div.SignFlow-account > div.SignFlowInput.SignFlow-accountInputContainer > div.SignFlow-accountInput.Input-wrapper > input')
    username.send_keys('username')
    passwd = driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.Login-content > form > div.SignFlow-password > div > div.Input-wrapper > input')
    passwd.send_keys('passwd')
    #passwd.send_keys(Keys.RETURN)
    driver.find_element_by_css_selector('#root > div > main > div > div > div > div.SignContainer-inner > div.Login-content > form > button').click()
    time.sleep(10)
    cookies = driver.get_cookies()
    c={item['name']:item['value'] for item in cookies}
    cookies_value = 'aliyungf_tc={}; _xsrf={}; q_c1={}; capsion_ticket={}; _zap={}; z_c0={}; d_c0={}'.format(c['aliyungf_tc'], c['_xsrf'],c['q_c1'], c['capsion_ticket'],c['_zap'], c['z_c0'], c['d_c0'])
    return cookies_value


def crawer_page(cookies_value):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36',

    }
    cookies_dict={
        'Cookie':cookies_value
    }
    cookies = requests.utils.cookiejar_from_dict(cookies_dict)#用requests的session方法保持cookie的时候发现requests不能保持手动构建的cookie,手动构建的cookie是dict类型的。要把dict转为 cookiejar类型。
    session = session()
    session.cookies=cookies
    response = session.get('https://www.zhihu.com/api/v4/search_v3?t=general&q=python&correction=1&offset=5&limit=10&search_hash_id=42cde34da2dc8ae359c5402be499b867',headers=headers)
    print(response.status_code)

 

posted @ 2018-05-28 21:51  Mr.SSC  阅读(6484)  评论(0编辑  收藏  举报