爬虫之github模拟登录

# -*- coding: utf-8 -*-
# @Time    : 2019/8/21 10:51

import requests
from lxml import etree
# 实例化一个session对象
session_obj = requests.Session()
def get_data():
    url = 'https://github.com/login'
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Host': 'github.com',
        'Pragma': 'no-cache',
        'Referer': 'https://github.com/',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36',
    }
    response=session_obj.get(url,headers=headers).text
    etre = etree.HTML(response)
    authenticity_token = etre.xpath('//input[@name="authenticity_token"]/@value')[0]
    timestamp = etre.xpath('//input[@name="timestamp"]/@value')[0]
    timestamp_secret = etre.xpath('//input[@name="timestamp_secret"]/@value')[0]
    return {"authenticity_token":authenticity_token,"timestamp":timestamp,"timestamp_secret":timestamp_secret}

def login():
    res = get_data()
    url = "https://github.com/session"
    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3',
        'Accept-Encoding': 'gzip, deflate, br',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Length': '344',
        'Content-Type': 'application/x-www-form-urlencoded',
#         'Cookie': 'has_recent_activity=1; _octo=GH1.1.1414586448.1566370604; logged_in=no; _ga=GA1.2.1258136172.1566370607; _gat=1; tz=Asia%2FShanghai; _gh_sess=UjVqZTMrNEVUMjFJdlI4Qm54d2tLSTdlTjNYemdzVTRYVU04VVJRRVJna0ZEZ2ZaalVXU0pkRWx0TDdwUkhyVllGbWJ6UXpoQXRrZFVWOVQrRnNrUTRJcDZhbXhXZ1M1MStxdjVFdVd5VUNhMU9kNkU4L1RmZWJCeUsvMUY2bktFRTErMzI0VmdjYVdKNkpueFRwdWc3dGgrdXB3WEIyb2hQOGJiWS9ScDlSS2pCeEpZZTJXZmx6VDM4WWtBUFMvdEw2Zmh0cllWcGNOc242a0Q2ODB2SkhkYkFVUEdXRDFIYlB5SXNQWUJMME1JZytPelRuWWpUVnN2V3B1QThSREpPN1czNWsrS3FnQm03MDBMRm04QnVEY1pBdkJ2bjZSRWN2WUdIam5taFRzeDFEYUZ1UEpHRnBMVjl2WFRVZUhFc1JqMks2Y1kwZG5uWmdid1hUVGRJQVhSWG5GdzRqMkFHNHFwWmlSdE9MSVVTdTViVlFXMC8yQjBKeVkxY2ViOVFFRllIc2RVSHM4dFhFWlZoV3hYaEZLZEhsZnpkbE1oRmtBZUxYZlVwRVJBOFVJc1psVzRWdFdZNkY4Z0QxcDdDN3VhNlZ5eHJRNFZmb2VuOEEyWWV1dDEwUXJyNlB5c0w2Z0NNa3VlZGM9LS1rTFl5cWFTT2JXUVJJc2xIUXN3czZ3PT0%3D--84b929daac7e06c596ef3e0f1ea7401763cddc34',
        'Host': 'github.com',
        'Origin': 'https://github.com',
        'Pragma': 'no-cache',
        'Referer': 'https://github.com/login',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'same-origin',
        'Sec-Fetch-User': '?1',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36'
    }
    data = {
        'commit': 'Sign in',
        'utf8': '',
        'authenticity_token': str(res["authenticity_token"]),
        'login': '用户名',
        'password': '密码',
        'webauthn-support': 'supported',
        'required_field_16f7': '',
        'timestamp': str(res["timestamp"]),
        'timestamp_secret': str(res["timestamp_secret"]),
    }
    ret = session_obj.post(url,headers=headers,data=data).text

    with open('./login.html',"w",encoding="utf-8") as f:
        f.write(ret)
login()

 

posted @ 2019-08-21 17:08  叫我大表哥  阅读(548)  评论(0编辑  收藏  举报