Python 爬虫验证码登录

# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request,FormRequest
import urllib.request

class DbSpider(scrapy.Spider):
    name = 'db'
    allowed_domains = ['douban.com']
    #start_urls = ['http://www.douban.com/']
    header = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE)'}
    def start_requests(self):
        return [Request('http://www.douban.com/accounts/login',headers=self.header,meta={'cookiejar':1},callback=self.parse)]

    def parse(self, response):
        authentication=response.xpath('//img[@id="captcha_image"]/@src').extract()
        print(authentication )
        if len(authentication )>0 :
            urllib.request.urlretrieve(authentication [0],filename='yzm.png')
            yzm=input('>>>请到当前目录下查看验证码并输入验证码...')
            yzm=yzm.strip()
            data = {
                "source": "index_nav",
                "form_email": "13956777437@163.com",
                "form_password": "xsy667437",
                "redir": "https://www.douban.com/people/170681969/",
                "captcha-solution":yzm
            }
        else:
            data = {
                "source":"index_nav",
                "form_email":"13956777437@163.com",
                "form_password":"xsy667437",
                "redir": "https://www.douban.com/people/170681969/"
            }

        print("登录中.....")
        return [FormRequest.from_response(response,
                                          meta={'cookiejar': response.meta['cookiejar']},
                                          headers=self.header,
                                          formdata=data,
                                          callback=self.next, )]
    def next(self,response):
        print(response.xpath("/html/head/title/text()").extract())
posted @ 2017-12-08 08:24 沧海一粒水阅读(272) 评论(0) 编辑收藏举报
会员力量，点亮园子希望
刷新页面返回顶部
Python 爬虫验证码登录

公告