Python 爬虫验证码登录

# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request,FormRequest
import urllib.request

class DbSpider(scrapy.Spider):
name = 'db'
allowed_domains = ['douban.com']
#start_urls = ['http://www.douban.com/']
header = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'}
def start_requests(self):
return [Request('http://www.douban.com/accounts/login',headers=self.header,meta={'cookiejar':1},callback=self.parse)]

def parse(self, response):
authentication=response.xpath('//img[@id="captcha_image"]/@src').extract()
print(authentication )
if len(authentication )>0 :
urllib.request.urlretrieve(authentication [0],filename='yzm.png')
yzm=input('>>>请到当前目录下查看验证码并输入验证码...')
yzm=yzm.strip()
data = {
"source": "index_nav",
"form_email": "13956777437@163.com",
"form_password": "xsy667437",
"redir": "https://www.douban.com/people/170681969/",
"captcha-solution":yzm
}
else:
data = {
"source":"index_nav",
"form_email":"13956777437@163.com",
"form_password":"xsy667437",
"redir": "https://www.douban.com/people/170681969/"
}

print("登录中.....")
return [FormRequest.from_response(response,
meta={'cookiejar': response.meta['cookiejar']},
headers=self.header,
formdata=data,
callback=self.next, )]
def next(self,response):
print(response.xpath("/html/head/title/text()").extract())
posted @ 2017-12-08 08:24  沧海一粒水  阅读(272)  评论(0编辑  收藏  举报