POST登录码云
# -*- coding: utf-8 -*-
import scrapy
import re
#登陆马云的URL https://gitee.com/login
#form 中 {userLogin:hanjian200}
#cookie
class MayunSpider(scrapy.Spider):
name = 'mayun'
allowed_domains = ['gitee.com']
start_urls = ['http://gitee.com/login']
def parse(self, response):
authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first()
# 构造POST请求,传递给引擎
yield scrapy.FormRequest(
"https://gitee.com/login",
formdata={
"authenticity_token": authenticity_token,
"utf8": "✓",
"commit": "登录",
"user[login]": "",
"user[password]": "",
"user[remember_me]":"0"
},
callback=self.home_parse
)
def home_parse(self, response):
data = response.body.decode()
with open("mayun.html", 'w') as f:
f.write(data)