用CrwalSpider爬取boss直聘
from boss.items import BossItem class ZhiPinSpider(CrwalSpider): name='Zhipin' allwed_domains=['zhipin.com'] start_urls=['https://www.zhipin.com/c100010000/?query=python&page=1'] rules={ #匹配职位列表页的规则 Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),follow=True) #匹配职位详情页的规则 Rule(LinkExtractor(allow=r'.+\?query=python&page=\d'),callback="parse_job",follow=False) def parse_job(self,response): title=response.xpath('//h1[@class="name"]/text()').get().strip() company=response.xpath('//div[@class="info-company"]//a/text()').get() item=BossItem(title=title,company=company) yield item