模拟登录并爬取github
login.py
#!/usr/bin/env python3 # coding=utf-8 # Version:python3.6.1 # Project:pachong # File:login.py # Data:2021/7/6 12:24 # Author:LGSP_Harold import time import requests from lxml import etree from pyquery import PyQuery as pq class Login: def __init__(self): self.headers = { 'referer': 'https://github.com/signup?ref_cta=Sign+up&ref_loc=header+logged+out&ref_page=%2F&source=header-home', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36', 'authority': 'github.com' } self.login_url = 'https://github.com/login' self.post_url = 'https://github.com/session' self.logined_url = 'https://github.com/settings/profile' self.feel_url = 'https://github.com/dashboard-feed' self.session = requests.Session() self.login_page_response = self.session.get(self.login_url, headers=self.headers) self.login_page_doc = etree.HTML(self.login_page_response.text) def token(self): token = self.login_page_doc.xpath('//input[@name="authenticity_token"]/@value')[0] return token def timestamp(self): timestamp = self.login_page_doc.xpath('//input[@name="timestamp"]/@value')[0] return timestamp def timestamp_secret(self): timestamp_secret = self.login_page_doc.xpath('//input[@name="timestamp_secret"]/@value')[0] return timestamp_secret def login(self, email, password): post_data = { 'commit': 'Sign+in', 'authenticity_token': self.token(), 'login': email, 'password': password, 'return_to': 'https://github.com/login', 'timestamp': self.timestamp(), 'timestamp_secret': self.timestamp_secret(), 'webauthn-iuvpaa-support': 'unknown', 'webauthn-support': 'supported', } response = self.session.post(self.post_url, data=post_data, headers=self.headers) response = self.session.get(self.feel_url, headers=self.headers) if response.status_code == 200: self.dynamics(response.text) response = self.session.get(self.logined_url, headers=self.headers) if response.status_code == 200: self.profile(response.text) def dynamics(self, html): selector = pq(html) dynamics = selector('div[class="d-flex flex-items-baseline"] div') dynamics.find('span').remove() for item in dynamics.items(): dynamic = item.text().strip() print(dynamic) def profile(self, html): selector = etree.HTML(html) name = selector.xpath('/html/body/div[4]/main/div/div[1]/div/div/h1/a/text()')[0] email = selector.xpath('//select[@id="user_profile_email"]/option[@value!=""]/text()') print(name, email) if __name__ == "__main__": login = Login() login.login(email='Github账号', password='Github密码')
略懂,略懂....