代理ip

代理(proxies参数)

如果需要使用代理,你可以通过为任意请求方法提供 proxies 参数来配置单个请求:

import requests

# 根据协议类型,选择不同的代理
proxies = {
  "http": "http://12.34.56.79:9527",
  "https": "http://12.34.56.79:9527",
}

response = requests.get("http://www.baidu.com", proxies = proxies)
print response.text

也可以通过本地环境变量 HTTP_PROXY 和 HTTPS_PROXY 来配置代理:

export HTTP_PROXY="http://12.34.56.79:9527"
export HTTPS_PROXY="https://12.34.56.79:9527"

若你的代理需要使用HTTP Basic Auth,可以使用 http://user:password@host/ 语法:

proxies = {
    "http": "http://user:pass@10.10.1.10:3128/",
}

web客户端验证

如果是Web客户端验证,需要添加 auth = (账户名, 密码)

import requests

auth=('test', '123456')

response = requests.get('http://192.168.199.107', auth = auth)

print response.text
#下面是获取代理ip及验证ip的fangfa
#coding=utf8
import requests
from bs4 import BeautifulSoup
import re
import os.path

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}


def getListProxies():
    session = requests.session()
    page = session.get("http://www.xicidaili.com/nn", headers=headers)
    soup = BeautifulSoup(page.text, 'lxml')

    proxyList = []
    taglist = soup.find_all('tr', attrs={'class': re.compile("(odd)|()")})
    for trtag in taglist:
        tdlist = trtag.find_all('td')
        proxy = {'http': tdlist[1].string + ':' + tdlist[2].string,
                 'https': tdlist[1].string + ':' + tdlist[2].string}
        url = "http://ip.chinaz.com/getip.aspx"  # 用来测试IP是否可用的url或者http://httpbin.org/ip
        try:
            response = session.get(url, proxies=proxy, timeout=5)
            proxyList.append(proxy)
            if (len(proxyList) == 10):
                break
        except Exception, e:
            continue

    return proxyList
res=getListProxies()
print res
'''
[{'http': u'110.73.32.21:8123', 'https': u'110.73.32.21:8123'}, {'http': u'101.17.26.249:8118',
 'https': u'101.17.26.249:8118'}, {'http': u'121.31.176.166:8123', 'https': u'121.31.176.166:8123'},
  {'http': u'110.72.37.19:8123', 'https': u'110.72.37.19:8123'}, {'http': u'61.135.217.7:80', 
  'https': u'61.135.217.7:80'}, {'http': u'122.114.31.177:808', 'https': u'122.114.31.177:808'},
   {'http': u'122.225.17.123:8080', 'https': u'122.225.17.123:8080'}, {'http': u'182.38.253.140:8118',
    'https': u'182.38.253.140:8118'}, {'http': u'139.199.87.42:80', 'https': u'139.199.87.42:80'}, 
    {'http': u'121.31.196.221:8123', 'https': u'121.31.196.221:8123'}]
'''

在selenium中使用代理ip:

火狐浏览器:
 ip是为“182.90.80.137:8123”相同格式的字符串
ip_ip = ip.split(":")[0]  
ip_port = int(ip.split(":")[1])  
print(ip_ip)  
print(ip_port)  
random_header = random.choice(HEADERS)  
webdriver.DesiredCapabilities.FIREFOX['firefox.page.settings.userAgent'] = random_header  
profile = webdriver.FirefoxProfile()  
profile.set_preference('network.proxy.type', 1)  # 默认值0,就是直接连接;1就是手工配置代理。  
profile.set_preference('network.proxy.http', ip_ip)  
profile.set_preference('network.proxy.http_port', ip_port)  
profile.set_preference('network.proxy.ssl', ip_ip)  
profile.set_preference('network.proxy.ssl_port', ip_port)  
profile.update_preferences()  
driver = webdriver.Firefox(profile)  
或者如果就使用一个ip直接:
from selenium import webdriver

profile = webdriver.FirefoxProfile()
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.http', '127.0.0.1')
profile.set_preference('network.proxy.http_port', 17890)  # int
profile.update_preferences()
driver = webdriver.Firefox(firefox_profile=profile)
driver.get('http://httpbin.org/ip')
谷歌浏览器:
from selenium import webdriver
chromeOptions = webdriver.ChromeOptions()

# 设置代理
chromeOptions.add_argument("--proxy-server=http://202.20.16.82:10152")
# 一定要注意,=两边不能有空格,不能是这样--proxy-server = http://202.20.16.82:10152
browser = webdriver.Chrome(chrome_options = chromeOptions)

# 查看本机ip,查看代理是否起作用
browser.get("http://httpbin.org/ip")
print(browser.page_source)

# 退出,清除浏览器缓存
browser.quit()

从Chrome 59版本开始,已经开始支持Headless模式,也就是无界面模式,这样爬取的时候就不会弹出浏览器了。如果要使用此模式,请把Chrome升级到59版本及以上。启用Headless模式的方式如下:

首先,创建ChromeOptions对象,接着添加headless参数,然后在初始化Chrome对象的时候通过chrome_options传递这个ChromeOptions对象,这样我们就可以成功启用Chrome的Headless模式了。


使用phantomJS访问

#coding=utf8
from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy,ProxyType

#不使用代理打开ip138
browser = webdriver.PhantomJS()
browser.get('http://ip.chinaz.com/getip.aspx')
print("1:",browser.session_id)
print("2:",browser.page_source)
print("3:",browser.get_cookies())


# 利用DesiredCapabilities(代理设置)参数值,重新打开一个sessionId,我看意思就相当于浏览器清空缓存后,加上代理重新访问一次url
proxy=webdriver.Proxy()
proxy.proxy_type=ProxyType.MANUAL
proxy.http_proxy='49.81.123.140:8118'
# 将代理设置添加到webdriver.DesiredCapabilities.PHANTOMJS中
proxy.add_to_capabilities(webdriver.DesiredCapabilities.PHANTOMJS)
browser.start_session(webdriver.DesiredCapabilities.PHANTOMJS)
browser.get('http://ip.chinaz.com/getip.aspx')
print('1: ',browser.session_id)
print('2: ',browser.page_source)
print('3: ',browser.get_cookies())
通过内容的,可以查看不同的ip
            
posted @ 2018-02-23 17:10  m*x*h  阅读(252)  评论(0编辑  收藏  举报