scrapy设置ip池问题
middlewares.py
import random, base64 class ProxyMiddleware(object): proxyList = ['61.129.70.131' , '120.204.85.29'] def process_request(self, request, spider): # Set the location of the proxy pro_adr = random.choice(self.proxyList) print("USE PROXY -> "+pro_adr) request.meta['proxy'] = "http://"+ pro_adr
setting.py
DOWNLOADER_MIDDLEWARES = { # 'ceshisc.middlewares.CeshiscDownloaderMiddleware': 543, # 'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware':123, 'ceshisc.middlewares.ProxyMiddleware': 100, 'scrapy.contrib.downloadermiddleware.httpproxy.HttpProxyMiddleware': 110 } ITEM_PIPELINES = { 'ceshisc.pipelines.CeshiscPipeline': 300, }
小蜘蛛代码
import scrapy class DmozSpider(scrapy.Spider): name = "demo" allowed_domains = ["baidu.com"] start_urls = ['http://www.baidu.com/'] def parse(self, response): print("进来了...........数据")