1.发送get和post请求才能获取response信息,并且把两个response信息分开,只返回post请求的response信息。
class DoubanSpider(scrapy.Spider): name = 'douban' allowed_domains = ['read.douban.com'] page_num = 0 url1 = 'https://read.douban.com/category/?kind/100&page=' start_urls = ( url1+str(page_num), ) def start_requests(self): url = 'https://read.douban.com/j/kind/' headers = { "Content-Type": "application/json", "Referer": "https://read.douban.com/category/?kind=100&page=0&sort=hot", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36", } payload = {"sort": "hot", "page": 1, "kind": 100, "query": "\n query getFilterWorksList($works_ids: [ID!]) {\n worksList(worksIds: $works_ids) {\n \n \n title\n cover\n url\n isBundle\n \n \n url\n title\n \n \n author {\n name\n url\n }\n origAuthor {\n name\n url\n }\n translator {\n name\n url\n }\n \n \n abstract\n editorHighlight\n \n \n isOrigin\n kinds {\n \n name @skip(if: true)\n shortName @include(if: true)\n id\n \n }\n ... on WorksBase @include(if: true) {\n wordCount\n wordCountUnit\n }\n ... on WorksBase @include(if: true) {\n \n isEssay\n \n ... on EssayWorks {\n favorCount\n }\n \n \n isNew\n \n averageRating\n ratingCount\n url\n \n \n \n }\n ... on WorksBase @include(if: false) {\n isColumn\n isEssay\n onSaleTime\n ... on ColumnWorks {\n updateTime\n }\n }\n ... on WorksBase @include(if: true) {\n isColumn\n ... on ColumnWorks {\n isFinished\n }\n }\n ... on EssayWorks {\n essayActivityData {\n \n title\n uri\n tag {\n name\n color\n background\n icon2x\n icon3x\n iconSize {\n height\n }\n iconPosition {\n x y\n }\n }\n \n }\n }\n highlightTags {\n name\n }\n \n ... on WorksBase @include(if: false) {\n \n fixedPrice\n salesPrice\n isRebate\n \n }\n ... on EbookWorks {\n \n fixedPrice\n salesPrice\n isRebate\n \n }\n ... on WorksBase @include(if: true) {\n ... on EbookWorks {\n id\n isPurchased\n isInWishlist\n }\n }\n \n id\n isOrigin\n }\n }\n ", "variables": {}} yield scrapy.Request(url, headers=headers, body=json.dumps(payload))
2.分开之后处理response信息。
def parse(self, response): Item = DoubanspiderItem() books = response.xpath('//div[@class="info"]') print(response.text) res = json.loads(response.text)["list"] for i in res : print(i["title"]) Item["book"] = i["title"] Item["author"] = i["origAuthor"]["name"] Item["price"] = i["title"] Item["number"] = i["wordCount"] Item["grade"] = i["title"] Item["info"] = i["abstract"] yield Item