class GithubSpider(scrapy.Spider):
name = 'github'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
username = ''#你的登录名
password = ''#你的密码
# 从登录页面响应中解析出post数据
authenticity_token = response.xpath('//input[@name="authenticity_token"]/@value').extract_first()
timestamp = response.xpath('//input[@name="timestamp"]/@value').extract_first()
timestamp_secret = response.xpath('//input[@name="timestamp_secret"]/@value').extract_first()
# 构造post表单数据
post_data = {
'commit': 'Sign in',
'authenticity_token': authenticity_token,
'login': username,
'password': password,
'trusted_device': '',
'webauthn-support': 'supported',
'webauthn-iuvpaa-support': 'unsupported',
'return_to': '',
'allow_signup': '',
'client_id': '',
'integration': '',
'required_field_755d': '',
'timestamp': timestamp,
'timestamp_secret': timestamp_secret
}
print(post_data)
# 针对登录表单url发送post请求
yield scrapy.FormRequest(
url='https://github.com/session',
callback=self.after_login,
formdata=post_data
)
def after_login(self, response):
yield scrapy.Request('http://github.com/feijiang-cloud', callback=self.check_login)
def check_login(self, response):
"""验证登录是否成功"""
print('parse starting。。。')
title = response.xpath('//head/title/text()').extract_first()
print(title)