14、使用csv和excel存储豆瓣top250电影信息

    记得我们第三关的时候爬取了豆瓣TOP250的电影名/评分/推荐语/链接,现在呢,我们要把它们存储下来,记得用今天课上学的csv和excel,分别存储下来哦~
 
 
 1 import csv
 2 import openpyxl
 3 import requests
 4 from bs4 import BeautifulSoup
 5 
 6 #   保存成CSV文件
 7 
 8 with open('02.csv','w',newline='',encoding='utf-8') as csv_file:
 9     writer = csv.writer(csv_file)
10     writer.writerow(['编号','电影名','评分','推荐语','链接'])
11 
12     for page in range(0,250,25):
13         res = requests.get('https://movie.douban.com/top250?start={}&filter='.format(page))
14         soup = BeautifulSoup(res.text,'html.parser')
15         items = soup.find(class_='grid_view').find_all('li')
16         for item in items:
17             num = item.find('em').text
18             name = item.find('span').text
19             rate = item.find(class_='rating_num').text
20             try:
21                 inq = item.find(class_='inq').text
22             except:
23                 inq = ''
24             url = item.find('a')['href']
25                 
26             writer.writerow([num,name,rate,inq,url])
27 
28 #   保存成excel
29 
30 wb = openpyxl.Workbook()
31 sheet = wb.active
32 sheet.title = 'TOP250'
33 sheet['A1'] = '编号'
34 sheet['B1'] = '电影名'
35 sheet['C1'] = '评分'
36 sheet['D1'] = '推荐语'
37 sheet['E1'] = '链接'
38 
39 for page in range(0,250,25):
40     res = requests.get('https://movie.douban.com/top250?start={}&filter='.format(page))
41     soup = BeautifulSoup(res.text,'html.parser')
42     items = soup.find(class_='grid_view').find_all('li')
43     for item in items:
44         num = item.find('em').text
45         name = item.find('span').text
46         rate = item.find(class_='rating_num').text
47         try:
48             inq = item.find(class_='inq').text
49         except:
50             inq = ''
51         url = item.find('a')['href']
52 
53         sheet.append([num,name,rate,inq,url])
54 
55 wb.save('02.xlsx')

 

 
 
 
posted @ 2019-04-16 22:30  三角形  阅读(1152)  评论(0编辑  收藏  举报