Python 网络爬虫代理设置
requests
http/https代理设置
import requests
proxy = '120.78.225.5:3128'
proxies = {
'http': 'http://' + proxy,
'https': 'https://' + proxy,
}
try:
response = requests.get('http://httpbin.org/get', proxies=proxies)
print(response.text)
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
# 输出:
{
"args": {},
"headers": {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Cache-Control": "max-age=259200",
"Host": "httpbin.org",
"User-Agent": "python-requests/2.21.0"
},
"origin": "120.231.147.81, 120.78.225.5, 120.231.147.81",
"url": "https://httpbin.org/get"
}
SOCKS5代理
如果代理需要认证:proxy = 'username:password@127.0.0.1:9743' 即可,127.0.0.1:9743按代理Ip更改。
如果要使用SOCKS5代理:
proxies = {
'http': 'socks5://' + proxy,
'https': 'socks5://' + proxy,
}
即可。
全局设置SOCKS5代理
import requests
import socks
import socket
socks.set_default_proxy(socks.SOCKS5, '218.91.112.56', 9999)
socket.socket = socks.socksocket
try:
response = requests.get('http://httpbin.org/get')
print(response.text)
except requests.exceptions.ConnectionError as e:
print('Error', e.args)
Selenium
代理IP(无需认证)
from selenium import webdriver
import time
proxy = '120.78.225.5:3128'
# 通过ChromeOptions()方法来设置代理
chrome_options = webdriver.ChromeOptions()
# add_argument()方法向代理添加参数
chrome_options.add_argument('--proxy-server=http://' + proxy)
chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get('http://httpbin.org/get')
time.sleep(5)
chrome.close()
# 输出:
{
"args": {},
"headers": {
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Cache-Control": "max-age=259200",
"Host": "httpbin.org",
"Upgrade-Insecure-Requests": "1",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
},
"origin": "120.231.147.81, 120.78.225.5, 120.231.147.81",
"url": "https://httpbin.org/get"
}
代理IP(需要认证)
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile
ip = '218.91.112.56'
port = 9999
username = 'foo'
password = 'bar'
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
}
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%(ip)s",
port: %(port)s
}
}
}
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%(username)s",
password: "%(password)s"
}
}
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
)
""" % {'ip': ip, 'port': port, 'username': username, 'password': password}
plugin_file = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(plugin_file, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options = Options()
# 向chrome_options对象中添加参数"--start-maximized"
chrome_options.add_argument("--start-maximized")
# 将扩展名的路径添加到plugin_file中,将其提取到ChromeDriver的配置列表保存起来
chrome_options.add_extension(plugin_file)
browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get('http://httpbin.org/get')
本文来自博客园,作者:LeeHua,转载请注明原文链接:https://www.cnblogs.com/liyihua/p/11265618.html