python之scrapy的FormRequest模拟POST表单自动登陆
1、FormRequest表单实现自动登陆
# -*- coding: utf-8 -*- import scrapy import re class GithubSpider(scrapy.Spider): name = 'github' allowed_domains = ['github.com'] start_urls = ['https://github.com/login'] def parse(self, response): authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first() utf8 = response.xpath("//input[@name='utf8']/@value").extract_first() commit = response.xpath("//input[@name='commit']/@value").extract_first() post_data = dict( login="812******0@qq.com", password="******", authenticity_token=authenticity_token, utf8=utf8, commit=commit, ) #表单请求 yield scrapy.FormRequest( "https://github.com/session", formdata=post_data, callback=self.after_login ) def after_login(self,response): # with open("a.html","w",encoding="utf-8") as f: # f.write(response.body.decode()) print(re.findall("812406210",response.body.decode()))
2、FormRequest.from_response模拟自动登陆
# -*- coding: utf-8 -*- import scrapy import re class Github2Spider(scrapy.Spider): name = 'github2' allowed_domains = ['github.com'] start_urls = ['https://github.com/login'] def parse(self, response): yield scrapy.FormRequest.from_response( response, #自动的从response中寻找from表单 #formdata只需要传入字典型登录名和密码,字典的健是input标签中的name属性 formdata={"login":"****@qq.com","password":"***********"}, callback = self.after_login ) def after_login(self,response): print(re.findall("........",response.body.decode()))
3、笔记
a)FormRequest
b) FormRequest.from_response
本文来自博客园,作者:小白啊小白,Fighting,转载请注明原文链接:https://www.cnblogs.com/ywjfx/p/11089248.html