# -*- coding: utf-8 -*- import scrapy from movie.items import MovieItem class MoviespiderSpider(scrapy.Spider): name = 'moviespider' # allowed_domains = ['www.movie.com'] start_urls = ['https://www.4567tv.tv/index.php/vod/show/id/1.html'] def detail_parse(self, response): item = response.meta['item'] director = response.xpath('/html/body/div[1]/div/div/div/div[2]/p[3]/a/text()').extract() item['director'] = director yield item def parse(self, response): li_list = response.xpath('//li[@class="col-md-6 col-sm-4 col-xs-3"]') for li in li_list: title = li.xpath('./div/a/@title').extract_first() actor = li.xpath('./div/div/p/text()').extract_first() detail_url = 'https://www.4567tv.tv' + li.xpath('./div/a/@href').extract_first() item = MovieItem() item['name'] = title item['actor'] = actor # 第一个解析的函数中不直接yield item, yield scrapy.Request()对象 传入下一个连接的url yield scrapy.Request(url=detail_url, callback=self.detail_parse, meta={'item': item})