爬虫免登录进入github



import requests

import re
ret = requests.get('https://github.com/login')
print(ret.headers)
cookies = ret.cookies
authenticity_token=re.findall('name="authenticity_token" value="(.*?)"',ret.text)[0]    #类似csrf-token的认证
print(authenticity_token)

data={
    "login": "账号",
    "password":"密码",
"authenticity_token": authenticity_token
    }
headers={
         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"

        }

ret = requests.post('https://github.com/session',data=data,cookies=cookies.get_dict(),headers=headers)
with open("github.html","wb") as f:
    f.write(ret.content)
posted @ 2019-01-22 20:44  Kingfan  阅读(373)  评论(0编辑  收藏  举报