scrapy直接取出文章所有内容
示例:
import scrapy from firstscrapy.items import CnblogsMysqlItem class CnblogsSpider(scrapy.Spider): name = 'cnblogs' allowed_domains = ['www.cnblogs.com'] # start_urls = ['http://www.cnblogs.com/'] start_urls = ['http://www.cnblogs.com/lifei01/p/13440458.html'] def parse(self, response): article = response.css('#main') print(article.css('#cb_post_title_url span::text').extract_first()) article_body = response.xpath('.//div[@id="cnblogs_post_body"]//text()').extract() for line in article_body: print(line.strip())