14、使用csv和excel存储豆瓣top250电影信息
记得我们第三关的时候爬取了豆瓣TOP250的电影名/评分/推荐语/链接,现在呢,我们要把它们存储下来,记得用今天课上学的csv和excel,分别存储下来哦~
1 import csv 2 import openpyxl 3 import requests 4 from bs4 import BeautifulSoup 5 6 # 保存成CSV文件 7 8 with open('02.csv','w',newline='',encoding='utf-8') as csv_file: 9 writer = csv.writer(csv_file) 10 writer.writerow(['编号','电影名','评分','推荐语','链接']) 11 12 for page in range(0,250,25): 13 res = requests.get('https://movie.douban.com/top250?start={}&filter='.format(page)) 14 soup = BeautifulSoup(res.text,'html.parser') 15 items = soup.find(class_='grid_view').find_all('li') 16 for item in items: 17 num = item.find('em').text 18 name = item.find('span').text 19 rate = item.find(class_='rating_num').text 20 try: 21 inq = item.find(class_='inq').text 22 except: 23 inq = '' 24 url = item.find('a')['href'] 25 26 writer.writerow([num,name,rate,inq,url]) 27 28 # 保存成excel 29 30 wb = openpyxl.Workbook() 31 sheet = wb.active 32 sheet.title = 'TOP250' 33 sheet['A1'] = '编号' 34 sheet['B1'] = '电影名' 35 sheet['C1'] = '评分' 36 sheet['D1'] = '推荐语' 37 sheet['E1'] = '链接' 38 39 for page in range(0,250,25): 40 res = requests.get('https://movie.douban.com/top250?start={}&filter='.format(page)) 41 soup = BeautifulSoup(res.text,'html.parser') 42 items = soup.find(class_='grid_view').find_all('li') 43 for item in items: 44 num = item.find('em').text 45 name = item.find('span').text 46 rate = item.find(class_='rating_num').text 47 try: 48 inq = item.find(class_='inq').text 49 except: 50 inq = '' 51 url = item.find('a')['href'] 52 53 sheet.append([num,name,rate,inq,url]) 54 55 wb.save('02.xlsx')