scrapy项目renrencookie

# -*- coding: utf-8 -*-
import scrapy
import re

class RenrenSpider(scrapy.Spider):
    name = 'renren'
    allowed_domains = ['renren.com']
    start_urls = ['http://www.renren.com/969574735/profile']
                ##个人的人人网主页,直接请求会不成功,需要登录

    def start_requests(self):
        #获取主页的cookie,转换成字典
        cookies = "anonymid=jrg1i50g-7eexp6; depovince=GW; _r01_=1; JSESSIONID=abcj8H8oNBjFffnOJDtIw; ick_login=28035622-0567-4fce-84e7-d2288596f877; ick=e87f9e28-7f1c-4695-a9d5-fc686c74327c; t=11361863ad1785a9ff8498bf8234feee5; societyguester=11361863ad1785a9ff8498bf8234feee5; id=969574735; xnsid=6c889a8c; XNESSESSIONID=7c09723b7d13; jebecookies=0ddc7c08-3ad5-4317-ac36-836ee3c863d4|||||; ver=7.0; loginfrom=null; jebe_key=bf038be3-3e55-4c02-8325-1e6a7938dc70%7Cbb223f6876cd2eb89db3a027b645f7ee%7C1548662439565%7C1%7C1548662439242; wp_fold=0"
        cookies = {i.split("=")[0]:i.split("=")[1] for i in cookies.split("; ")}
        yield scrapy.Request(
            self.start_urls[0],
            callback=self.parse,
            cookies=cookies
        )


    def parse(self, response):
        print("===========")
        print(re.findall("陈陈",response.body.decode()))
        yield scrapy.Request(
            "http://www.renren.com/969574735/profile?v=info_timeline", #再请求一下个人资料页面
            callback=self.parse_detail

        )
        yield scrapy.Request(
            "http://friend.renren.com/managefriends",  # 再请求一下朋友页面
            callback=self.parse_detail

        )
    def parse_detail(self,response):
        print(re.findall("陈陈", response.body.decode()))
        print("++++++++")
        print(re.findall("公司全称", response.body.decode()))

settings

COOKIES_DEBUG = True  #终端可以看到cookie的传递前提是COOKIES_ENABLED = True

COOKIES_ENABLED = True

 项目地址:https://github.com/CH-chen/renrencookie

posted @ 2019-01-29 06:28  CHVV  阅读(167)  评论(0编辑  收藏  举报