.CrawlSpider读书网练习
1.创建项目:scrapy startproject dushuproject 2.跳转到spiders路径 cd\dushuproject\dushuproject\spiders 3.创建爬虫类:scrapy genspider read www.dushu.com
import scrapy from readPro.items import ReadproItem class ReadnetSpider(scrapy.Spider): name = 'readNet' allowed_domains = ['www.dushu.com'] start_urls = ['https://www.dushu.com/book/1179_1.html'] base_url = 'https://www.dushu.com/book/1179_' page = 1 def parse(self, response): print("读书网") img = response.xpath('//div[@class="bookslist"]//li//img') for item in img: src = item.xpath('./@data-original').extract_first() name = item.xpath('./@alt').extract_first() print(src,name) book = ReadproItem(src=src,name=name) yield book pass if self.page < 101: self.page = self.page + 1 url = self.base_url + str(self.page) + '.html' yield scrapy.Request(url=url, callback=self.parse)