模拟登录并爬取github

login.py

#!/usr/bin/env python3
# coding=utf-8
# Version:python3.6.1
# Project:pachong
# File:login.py
# Data:2021/7/6 12:24
# Author:LGSP_Harold
import time
import requests
from lxml import etree
from pyquery import PyQuery as pq


class Login:
    def __init__(self):
        self.headers = {
            'referer': 'https://github.com/signup?ref_cta=Sign+up&ref_loc=header+logged+out&ref_page=%2F&source=header-home',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
            'authority': 'github.com'
        }
        self.login_url = 'https://github.com/login'
        self.post_url = 'https://github.com/session'
        self.logined_url = 'https://github.com/settings/profile'
        self.feel_url = 'https://github.com/dashboard-feed'
        self.session = requests.Session()

        self.login_page_response = self.session.get(self.login_url, headers=self.headers)
        self.login_page_doc = etree.HTML(self.login_page_response.text)

    def token(self):
        token = self.login_page_doc.xpath('//input[@name="authenticity_token"]/@value')[0]
        return token

    def timestamp(self):
        timestamp = self.login_page_doc.xpath('//input[@name="timestamp"]/@value')[0]
        return timestamp

    def timestamp_secret(self):
        timestamp_secret = self.login_page_doc.xpath('//input[@name="timestamp_secret"]/@value')[0]
        return timestamp_secret

    def login(self, email, password):

        post_data = {
            'commit': 'Sign+in',
            'authenticity_token': self.token(),
            'login': email,
            'password': password,
            'return_to': 'https://github.com/login',
            'timestamp': self.timestamp(),
            'timestamp_secret': self.timestamp_secret(),
            'webauthn-iuvpaa-support': 'unknown',
            'webauthn-support': 'supported',
        }

        response = self.session.post(self.post_url, data=post_data, headers=self.headers)
        response = self.session.get(self.feel_url, headers=self.headers)
        if response.status_code == 200:
            self.dynamics(response.text)

        response = self.session.get(self.logined_url, headers=self.headers)
        if response.status_code == 200:
            self.profile(response.text)

    def dynamics(self, html):
        selector = pq(html)
        dynamics = selector('div[class="d-flex flex-items-baseline"] div')
        dynamics.find('span').remove()
        for item in dynamics.items():
            dynamic = item.text().strip()
            print(dynamic)

    def profile(self, html):
        selector = etree.HTML(html)
        name = selector.xpath('/html/body/div[4]/main/div/div[1]/div/div/h1/a/text()')[0]
        email = selector.xpath('//select[@id="user_profile_email"]/option[@value!=""]/text()')
        print(name, email)


if __name__ == "__main__":
    login = Login()
    login.login(email='Github账号', password='Github密码')

 

posted @ 2021-08-18 15:56  嘆世殘者——華帥  阅读(69)  评论(0编辑  收藏  举报