scrapy框架使用ip代理(ip池) #request.meta['proxy'] = "http://122.7.199.137:4558"

在中间件middlewares中写入一个类,然后再setting中的DOWNLOADER_MIDDLEWARES = {}开启一下

具体代码是

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from scrapy.http import HtmlResponseip_pool = []
pro_addr = ''
class proxyMiddleware(object):
    def process_request(self, request, spider):
        global pro_addr,ip_pool
        if "jdzgb" in spider.name:
            while 1:
                if len(ip_pool) < 3:
                    get_ip_url = "http://d.jghttp.golangapi.com/getipxxxxxx"  #获取ip的url
                    ips = requests.get(get_ip_url).text.split('\n')
                    for i in ips[:-1]:
                        ip_pool.append(i.strip())
                    break
                else:
                    break
            if not pro_addr:
                pro_addr = random.choice(ip_pool)
            while 1:
                url = 'https://www.baidu.com'
                proxies = {
                    "http": pro_addr,
                }
                try:
                    s = requests.session()
                    s.keep_alive = False  # 关闭多余连接
                    response = s.get(url=url,proxies=proxies,timeout=4, verify=False)
                    code = response.status_code
                    # res = requests.get(url, proxies=proxies,timeout=4)
                    # code = res.status_code
                except Exception as e:
                    print(e)
                    code = '0'
                print(code,pro_addr)
                # print(1, ip_pool)
                if code == 200 or code == 304:
                    request.meta['proxy'] = "http://" + pro_addr<br>            #pro_addr = random.choice(ip_pool)   #这里的意思是每次访问的ip都不一样,如果把这里关闭,那么就是一个ip如果不过期,就会一直使用这个ip
                    break
                else:
                    if pro_addr in ip_pool:
                        ip_pool.remove(pro_addr)
                    while 1:
                        if len(ip_pool) < 3:
                            get_ip_url = "http://d.jghttp.golangapi.com/getipxxxxxxx"#获取ip的url
                            ips = requests.get(get_ip_url).text.split('\n')
                            for i in ips[:-1]:
                                ip_pool.append(i.strip())
                            break
                        else:
                            break
                    pro_addr = random.choice(ip_pool)

  

posted @   乔儿  阅读(1813)  评论(1编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 一文读懂知识蒸馏
· 终于写完轮子一部分:tcp代理 了,记录一下
点击右上角即可分享
微信分享提示