scrapy框架post模拟登录Github


class GithubSpider(scrapy.Spider):
    name = 'github'
    allowed_domains = ['github.com']
    start_urls = ['https://github.com/login']

    def parse(self, response):
        username = ''#你的登录名
        password = ''#你的密码

        # 从登录页面响应中解析出post数据
        authenticity_token = response.xpath('//input[@name="authenticity_token"]/@value').extract_first()
        timestamp = response.xpath('//input[@name="timestamp"]/@value').extract_first()
        timestamp_secret = response.xpath('//input[@name="timestamp_secret"]/@value').extract_first()
        
        # 构造post表单数据
        post_data = {
            'commit': 'Sign in',
            'authenticity_token': authenticity_token,
            'login': username,
            'password': password,
            'trusted_device': '',
            'webauthn-support': 'supported',
            'webauthn-iuvpaa-support': 'unsupported',
            'return_to': '',
            'allow_signup': '',
            'client_id': '',
            'integration': '',
            'required_field_755d': '',
            'timestamp': timestamp,
            'timestamp_secret': timestamp_secret  
        }
        
        print(post_data)
        # 针对登录表单url发送post请求
        yield scrapy.FormRequest(
            url='https://github.com/session', 
            callback=self.after_login,
            formdata=post_data
        )
    
    def after_login(self, response):
        yield scrapy.Request('http://github.com/feijiang-cloud', callback=self.check_login)
    
    def check_login(self, response):
        """验证登录是否成功"""
        print('parse starting。。。')
        title = response.xpath('//head/title/text()').extract_first()
        print(title)
posted on 2021-03-09 21:20 行之间阅读(122) 评论(0) 收藏举报