06、豆瓣爬虫
把豆瓣TOP250里面的 序号/电影名/评分/推荐语/链接 都爬取下来,结果就是全部展示打印出来
1 #6、豆瓣爬虫 2 # 把豆瓣TOP250里面的 序号/电影名/评分/推荐语/链接 都爬取下来,结果就是全部展示打印出来 3 # URL https://movie.douban.com/top250?start= 4 5 import requests 6 from bs4 import BeautifulSoup 7 8 for i in range(0,250,25): 9 res = requests.get('https://movie.douban.com/top250?start={}'.format(i)) 10 html = res.text 11 soup = BeautifulSoup(html,'html.parser') 12 items = soup.find_all('div',class_='item') 13 14 for item in items: 15 print(item.find('em').text,end='\t') 16 print(item.find('span').text,end='\t') 17 print(item.find('span',class_='rating_num').text,end='\t') 18 try: 19 print(item.find('span',class_='inq').text,end='\t') 20 except AttributeError: 21 print('',end='\t') 22 print(item.find('a')['href']) 23 24 ''' 25 老师的代码 26 27 import requests 28 # 引用requests模块 29 from bs4 import BeautifulSoup 30 for x in range(10): 31 url = 'https://movie.douban.com/top250?start=' + str(x*25) + '&filter=' 32 res = requests.get(url) 33 bs = BeautifulSoup(res.text, 'html.parser') 34 tag_num = bs.find_all('div', class_="item") 35 # 查找包含序号,电影名,链接的<div>标签 36 tag_comment = bs.find_all('div', class_='star') 37 # 查找包含评分的<div>标签 38 tag_word = bs.find_all('span', class_='inq') 39 # 查找推荐语 40 41 42 list_all = [] 43 for x in range(len(tag_num)): 44 if tag_num[x].text[2:5] == '223' or tag_num[x].text[2:5] =='244': 45 list_movie = [tag_num[x].text[2:5], tag_num[x].find('img')['alt'], tag_comment[x].text[2:5], tag_num[x].find('a')['href'] ] 46 else: 47 list_movie = [tag_num[x].text[2:5], tag_num[x].find('img')['alt'], tag_comment[x].text[2:5], tag_word[x].text, tag_num[x].find('a')['href']] 48 list_all.append(list_movie) 49 print(list_all) 50 '''
items中每个Tag的内容如下
1 <div class="item"> 2 <div class="pic"> 3 <em class="">151</em> 4 <a href="https://movie.douban.com/subject/24750126/"> 5 <img width="100" alt="荒蛮故事" 6 src="https://img3.doubanio.com/view/photo/s_ratio_poster/public/p2192834364.webp" class=""> 7 </a> 8 </div> 9 <div class="info"> 10 <div class="hd"> 11 <a href="https://movie.douban.com/subject/24750126/" class=""> 12 <span class="title">荒蛮故事</span> 13 <span class="title"> / Relatos salvajes</span> 14 <span class="other"> / 蛮荒故事 / 生命中最抓狂的小事(台)</span> 15 </a> 16 <span class="playable">[可播放]</span> 17 </div> 18 <div class="bd"> 19 <p class=""> 20 导演: 达米安·斯兹弗隆 Damián Szifron 主演: 达里奥·葛兰帝内提 Darío... 21 22 2014 / 阿根廷 西班牙 / 剧情 喜剧 犯罪 23 </p> 24 <div class="star"> 25 <span class="rating45-t"></span> 26 <span class="rating_num" property="v:average">8.8</span> 27 <span property="v:best" content="10.0"></span> 28 <span>203246人评价</span> 29 </div> 30 <p class="quote"> 31 <span class="inq">始于荒诞,止于更荒诞。</span> 32 </p> 33 </div> 34 </div> 35 </div>