自定义代理IP

自定义proxy.py
    import base64
    import random
    from urllib.parse import unquote, urlunparse
    from urllib.request import _parse_proxy

    from scrapy.utils.python import to_bytes


    class XdbProxyMiddleware:

        def _basic_auth_header(self, username, password):
            user_pass = to_bytes(
                '%s:%s' % (unquote(username), unquote(password)),
                encoding='latin-1')
            return base64.b64encode(user_pass)

        def process_request(self, request, spider):
            PROXIES = [
                        'http://125.108.106.165:9000/',
                    ]
            url = random.choice(PROXIES)
            orig_type = ''
            proxy_type, user, password, hostport = _parse_proxy(url)
            proxy_url = urlunparse((proxy_type or orig_type, hostport, '', '', '', ''))

            if user:
                creds = self._basic_auth_header(user, password)
            else:
                creds = None
            # 给请求头里面换成随机代理IP
            request.meta['proxy'] = proxy_url
            print(request.meta.get('proxy'))
            if creds:
                # 加入代理用户认证
                request.headers['Proxy-Authorization'] = b'Basic ' + creds
在配置文件中
    settings.py
        DOWNLOADER_MIDDLEWARES = {
           # 'xdb.middlewares.XdbDownloaderMiddleware': 543,
           'xdb.proxy.XdbProxyMiddleware': 751,
           # 'scrapy.downloadermiddlewares.useragent.UserAgentMiddleware': None,
        }

 

posted @ 2020-06-13 23:20  高汤  阅读(226)  评论(0编辑  收藏  举报