爬取豆瓣高分电影榜实例
一、环境
win7 + python 3.6
二、代码
import requests from bs4 import BeautifulSoup seq = 0 for preSumCount in range(0,301,25): response = requests.get( 'https://www.douban.com/doulist/240962/?start=%s&sort=seq&playable=0&sub_type='%preSumCount ) #查看该网址编码格式 # print(response.apparent_encoding) content = response.text soup = BeautifulSoup(content,features='html.parser') obj_list = soup.find_all('div',class_='doulist-item',) with open('豆瓣高分电影', 'a', encoding='utf-8') as f: for obj in obj_list: try: seq +=1 title = obj.find('div',class_='title') url = title.find('a').attrs['href'] movie = title.text.strip('"').strip() score = obj.find('span',class_='rating_nums').text info = '排名:'+str(seq)+' ' + movie + ' ' +'评分:'+score + ' ' +url +'\n' print(info,end='') f.write(info) except: print(seq,' 不存在')
三、部分结果截图