Python 网络爬虫代理设置

requests

http/https代理设置

import requests

proxy = '120.78.225.5:3128'
proxies = {
    'http': 'http://' + proxy,
    'https': 'https://' + proxy,
}
try:
    response = requests.get('http://httpbin.org/get', proxies=proxies)
    print(response.text)
except requests.exceptions.ConnectionError as e:
    print('Error', e.args)
# 输出:
{
  "args": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Cache-Control": "max-age=259200", 
    "Host": "httpbin.org", 
    "User-Agent": "python-requests/2.21.0"
  }, 
  "origin": "120.231.147.81, 120.78.225.5, 120.231.147.81", 
  "url": "https://httpbin.org/get"
}

SOCKS5代理

如果代理需要认证:proxy = 'username:password@127.0.0.1:9743'  即可,127.0.0.1:9743按代理Ip更改。

如果要使用SOCKS5代理:
proxies = {
    'http': 'socks5://' + proxy,
    'https': 'socks5://' + proxy,
}
即可。

全局设置SOCKS5代理

import requests
import socks
import socket

socks.set_default_proxy(socks.SOCKS5, '218.91.112.56', 9999)
socket.socket = socks.socksocket
try:
    response = requests.get('http://httpbin.org/get')
    print(response.text)
except requests.exceptions.ConnectionError as e:
    print('Error', e.args)

Selenium

代理IP(无需认证)

from selenium import webdriver
import time

proxy = '120.78.225.5:3128'

# 通过ChromeOptions()方法来设置代理
chrome_options = webdriver.ChromeOptions()
# add_argument()方法向代理添加参数
chrome_options.add_argument('--proxy-server=http://' + proxy)

chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get('http://httpbin.org/get')
time.sleep(5)
chrome.close()
# 输出:
{
  "args": {}, 
  "headers": {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3", 
    "Accept-Encoding": "gzip, deflate", 
    "Accept-Language": "zh-CN,zh;q=0.9", 
    "Cache-Control": "max-age=259200", 
    "Host": "httpbin.org", 
    "Upgrade-Insecure-Requests": "1", 
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36"
  }, 
  "origin": "120.231.147.81, 120.78.225.5, 120.231.147.81", 
  "url": "https://httpbin.org/get"
}

代理IP(需要认证)

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import zipfile

ip = '218.91.112.56'
port = 9999
username = 'foo'
password = 'bar'

manifest_json = """
{
    "version": "1.0.0",
    "manifest_version": 2,
    "name": "Chrome Proxy",
    "permissions": [
        "proxy",
        "tabs",
        "unlimitedStorage",
        "storage",
        "<all_urls>",
        "webRequest",
        "webRequestBlocking"
    ],
    "background": {
        "scripts": ["background.js"]
    }
}
"""

background_js = """
var config = {
        mode: "fixed_servers",
        rules: {
          singleProxy: {
            scheme: "http",
            host: "%(ip)s",
            port: %(port)s
          }
        }
      }
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
    return {
        authCredentials: {
            username: "%(username)s",
            password: "%(password)s"
        }
    }
}
chrome.webRequest.onAuthRequired.addListener(
            callbackFn,
            {urls: ["<all_urls>"]},
            ['blocking']
)
""" % {'ip': ip, 'port': port, 'username': username, 'password': password}

plugin_file = 'proxy_auth_plugin.zip'

with zipfile.ZipFile(plugin_file, 'w') as zp:
    zp.writestr("manifest.json", manifest_json)
    zp.writestr("background.js", background_js)

chrome_options = Options()
# 向chrome_options对象中添加参数"--start-maximized"
chrome_options.add_argument("--start-maximized")
# 将扩展名的路径添加到plugin_file中,将其提取到ChromeDriver的配置列表保存起来
chrome_options.add_extension(plugin_file)

browser = webdriver.Chrome(chrome_options=chrome_options)
browser.get('http://httpbin.org/get')
posted @ 2019-07-29 18:17  LeeHua  阅读(412)  评论(0编辑  收藏  举报