爬github模拟浏览器跳过登录-Day01-2
#POST请求自动登录github: # 请求URL:http://github.com/session # 请求方式: # POST # 请求头: # Cookie: # User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36 # 请求体: # #1、获取token随机字符串 #1.访问登录页面获取token随机字符串 # 请求url: # 请求方式: # 请求头: # 2.解析并提取token字符串 # 正则 # <input type="hidden" name="authenticity_token" value="(.*?)" /> import requests import re login_url = 'https://github.com/login' #//login页面的请求头信息 login_header = { #use-agent 作为key 而Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' # 作为value的形式,彼此都要用字典的形式 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' } login_res = requests.get(url=login_url,headers=login_header) # 解析提取token数据库 authenticity_token = re.findall( '<input type="hidden" name="authenticity_token" value="(.*?)" />', login_res.text, re.S )[0] print(authenticity_token) #获取login页面的cookies信息 #print(type(login_res.cookies)) #print(type(login_res.cookies)) login_cookies = login_res.cookies.get_dict() # requests.post() #2.开始自动登录github: #请求url: # https://github.com/session #请求方式: # POST #请求头: #COOKIE #'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' #请求体: # commit: Sign in # utf8: ✓ # authenticity_token: 1bqd8DJ6WrmNPp96+dezvkSIExC+u21MPx0s5OT4RhQkNs06EIQM639HdE6zrMZNaRwHjokHwnNMf1qWY8TOZw== # login: vggf # password: 23r23rf2 # webauthn-support: supported #session登陆url session_url = 'https://github.com/session' # 请求头信息 session_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36' } form_data = { "commit":"Sign in", "utf8":"√", "authenticity_token":authenticity_token, "login":"Vicks9911zxd", "password":"**********", "webauthn-support":"supported" } session_res = requests.post(url=session_url, headers=session_headers, cookies=login_cookies, data=form_data) with open('github2.html','w',encoding='utf-8') as f: f.write(session_res.text)
try our best