Scrapy使用socks5代码,基于python3
1.下载该项目的代码
https://github.com/unk2k/txsocksx
然后打包上传到python第三方库,之后导入包即可。
2.修改的下载中间件代码如下:
from scrapy.core.downloader.handlers.http11 import HTTP11DownloadHandler, ScrapyAgent
from twisted.internet import reactor
from twisted.internet.endpoints import TCP4ClientEndpoint
from txsocksx.http import SOCKS5Agent
class Socks5DownloadHandler(HTTP11DownloadHandler):
def download_request(self, request, spider):
"""Return a deferred for the HTTP download"""
agent = ScrapySocks5Agent(contextFactory=self._contextFactory, pool=self._pool, crawler=self._crawler)
return agent.download_request(request)
class ScrapySocks5Agent(ScrapyAgent):
def _get_agent(self, request, timeout):
proxyEndpoint = TCP4ClientEndpoint(reactor, "127.0.0.1", 1080)
agent = SOCKS5Agent(reactor, proxyEndpoint=proxyEndpoint)
return agent