豆瓣top250.py
#就成功了一波,然后被封了。已经着手准备爬去豆瓣所有的电影titleandgrade
import requests from bs4 import BeautifulSoup import random headers = {'user_agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.59 Safari/537.36'} proxy_list = [ 'http://117.177.250.151:8081', 'http://111.85.219.250:3129', 'http://122.70.183.138:8118', ] proxy_ip = random.choice(proxy_list) # 随机获取代理ip proxies = {'http': proxy_ip} def get_items_from(page): for i in range(0,page): list_view = 'https://movie.douban.com/top250?start={}&filter='.format(i) wb_data = requests.get(list_view,headers=headers,proxies=proxies) soup = BeautifulSoup(wb_data.text,'lxml') data={ 'title':soup.select('div.hd > a > span.title')[0].text, 'grade':soup.select('div.star > span.rating_num')[0].text } print(data) get_items_from(250)