使用代理中转服务,解决selenium 使用短时效代理避免重启
背景
selenium 配置代理需要在启动的时候配置,如果代理失效或者不可用,切换需要重启,浪费时间。
思路解决方案
可以使用搭建一个代理中转服务,让selenium连接固定的中转服务,中转服务选择可用的代理。
代码实现
代码实现就简单了,中转服务将收到的网络请求,直接转发给可用的代理就行,如果代理连接失败,则切换到新的代理。
以下是python实现
import socket
import _thread
from threading import Thread
from redis.client import Redis
from adsl_proxy_pool import ADSLProxyPool
redis_client = Redis(host="127.0.0.12", port=6383,
password="password", db=5)
adsl_proxy_pool = ADSLProxyPool(redis_client=redis_client)
cache_proxy = None
def communicate(sock1, sock2):
"""
socket之间的数据交换
:param sock1:
:param sock2:
:return:
"""
try:
while 1:
data = sock1.recv(1024)
if not data:
return
sock2.sendall(data)
except:
pass
def handle(client):
"""
处理连接进来的客户端
:param client:
:return:
"""
global cache_proxy
timeout = 60
client.settimeout(timeout)
proxy_server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
while 1:
try:
# 获取代理
host_info = get_proxy()
proxy_server.connect(host_info)
proxy_server.settimeout(timeout)
break
except:
cache_proxy = None
pass
# 转发到proxy_server
_thread.start_new_thread(communicate, (client, proxy_server))
# 将proxy_server响应给client
communicate(proxy_server, client)
def get_proxy():
# 先缓存proxy
global cache_proxy
# 从代理池获取可用代理
proxy = adsl_proxy_pool.get_proxy() if cache_proxy is None else cache_proxy
if not proxy:
return get_proxy()
else:
cache_proxy = proxy
proxy = proxy.replace("http://", "").split(":")
return proxy[0], int(proxy[1])
def serve(ip, port):
"""
代理服务
:param ip:
:param port:
:return:
"""
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind((ip, port))
s.listen(10)
print('\n local proxy server started...\n')
while True:
conn, addr = s.accept()
_thread.start_new_thread(handle, (conn,))
def start_local_proxy_server(server="127.0.0.1", port=8081):
"""
启动本地
:return:
"""
proxy_thread = Thread(target=serve, name='APScheduler', args=(server, port,))
proxy_thread.daemon = True
proxy_thread.start()
if __name__ == '__main__':
IP = "127.0.0.1"
PORT = 8080
serve(IP, PORT)
selenium 直接配置本地代理
start_local_proxy_server("127.0.0.1", 8081)
agent = Agent(chrome_args=get_chrome_opt_list(),
proxy="http://127.0.0.1:8081",
verbose=True,
chrome_driver_path='/usr/local/bin/chromedriver')
-----------
options = uc.ChromeOptions()
if self.proxy:
self.logger.info("use proxy:{}".format(self.proxy))
options.add_argument(f'--proxy-server={self.proxy}')
总结
一开始考虑,使用nginx来中转代理服务,但想到nginx本质就是一个中转,从方便性和可控性考虑,直接用代码实现更好。