scrapy 传递参数meta用法
场景:一个标题A,上面有一个链接B,链接B里面有内容C,如果要把标题A和内容C一一对应的话,在scrapy里面用meta就比较容易实现啦。
class File01Spider(scrapy.Spider): name = 'file01' start_urls = ['http://www.jkl.com.cn/cn/shopLis.aspx?id=862'] page = 2
def parse(self, response):
item = YxqItem()
name = response.xpath('//span[@class="con01"]/text()').extract()[0]
urls = response.xpath('//div[@class="shopLis"]//a/@href').extract()[0]
name = name.strip()
urls = 'http://www.jkl.com.cn/cn/'+ urls
item['name'] = name
yield scrapy.Request(url=urls,callback=self.pg,meta={'item':item})
def pg(self,response):
item = response.meta['item']
road = response.xpath('//div[@class="text"]/p[2]/text()').extract()[0]
item['road'] = road
yield item