爬虫ip代理
from bs4 import BeautifulSoup import requests import random def get_ip_list(url, headers): web_data = requests.get(url, headers=headers) print(web_data) soup = BeautifulSoup(web_data.text, 'lxml') ips = soup.find_all('tr') ip_list = [] for i in range(1, len(ips)): ip_info = ips[i] tds = ip_info.find_all('td') ip_list.append(tds[0].text + ':' + tds[1].text) return ip_list def get_random_ip(ip_list): proxy_list = [] for ip in ip_list: proxy_list.append('http://' + ip) proxy_ip = random.choice(proxy_list) proxies = {'http': proxy_ip} return proxies if __name__ == '__main__': url = 'https://www.kuaidaili.com/free/inha' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.60 Safari/537.17' } ip_list = get_ip_list(url, headers=headers) print(ip_list) url = "http://www.baidu.com" proxys = [] for ip in ip_list: proxys.append({"http":ip}) for proxy in proxys: try: res = requests.get(url,proxies=proxy) print(res) except Exception as e: print(proxy) print(str(e)) continue # proxies = get_random_ip(ip_list) # print(proxies)
I can feel you forgetting me。。 有一种默契叫做我不理你,你就不理我