Python 网络爬虫代理设置

requests

http/https代理设置

复制
import requests
proxy = '120.78.225.5:3128'
proxies = {
'http': 'http://' + proxy,
'https': 'https://' + proxy,
}
try:
response = requests.get('http://httpbin.org/get', proxies=proxies)
print(response.text)
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
复制
# 输出:
{
"args": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Cache-Control": "max-age=259200",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.21.0"
},
"origin": "120.231.147.81, 120.78.225.5, 120.231.147.81",
"url": "https://httpbin.org/get"
}

SOCKS5代理

复制
如果代理需要认证:proxy = 'username:password@127.0.0.1:9743'  即可,127.0.0.1:9743按代理Ip更改。
如果要使用SOCKS5代理:
proxies = {
'http': 'socks5://' + proxy,
'https': 'socks5://' + proxy,
}
即可。

全局设置SOCKS5代理

复制
import requests
import socks
import socket
socks.set_default_proxy(socks.SOCKS5, '218.91.112.56', 9999)
socket.socket = socks.socksocket
try:
response = requests.get('http://httpbin.org/get')
print(response.text)
except requests.exceptions.ConnectionError as e:
print('Error', e.args)

Selenium

代理IP(无需认证)

复制
from selenium import webdriver
import time
proxy = '120.78.225.5:3128'
# 通过ChromeOptions()方法来设置代理
chrome_options = webdriver.ChromeOptions()
# add_argument()方法向代理添加参数
chrome_options.add_argument('--proxy-server=http://' + proxy)
chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get('http://httpbin.org/get')
time.sleep(5)
chrome.close()
复制
# 输出:
{
"args": {},
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=259200",
"Host": "httpbin.org",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
},
"origin": "120.231.147.81, 120.78.225.5, 120.231.147.81",
"url": "https://httpbin.org/get"
}

代理IP(需要认证)

复制
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile
ip = '218.91.112.56'
port = 9999
username = 'foo'
password = 'bar'
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
}
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%(ip)s",
port: %(port)s
}
}
}
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%(username)s",
password: "%(password)s"
}
}
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
)
""" % {'ip': ip, 'port': port, 'username': username, 'password': password}
plugin_file = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(plugin_file, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options = Options()
# 向chrome_options对象中添加参数"--start-maximized"
chrome_options.add_argument("--start-maximized")
# 将扩展名的路径添加到plugin_file中,将其提取到ChromeDriver的配置列表保存起来
chrome_options.add_extension(plugin_file)
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get('http://httpbin.org/get')
posted @   LeeHua  阅读(427)  评论(0编辑  收藏  举报
编辑推荐:
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· winform 绘制太阳,地球,月球 运作规律
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾(3.3-3.9)
点击右上角即可分享
微信分享提示

目录导航