(二)python爬虫实例:猫眼电影TOP100榜并将电影信息写入到Excel(Excel列宽自适应)
# -*- coding:utf-8 -*- import requests from bs4 import BeautifulSoup import xlrd,xlwt urls = [ "https://maoyan.com/board/4?offset={}".format(i) for i in range(0,100,10) ] header = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (" "KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" } length = {} def len_byte(value): length = len(value) utf8_length = len(value.encode('utf-8')) length = (utf8_length - length)/2 + length return int(length) #电影 主演 时间 国家 评分 def FilmInformation(url): content = [] r = requests.get(url,headers = header) respond = r.text soup = BeautifulSoup(respond,"html.parser") films = soup.select(".board-item-main") for film in films: name = (film.select("[title]")[0].text) # name = (film.select(".name a")[0].text) 也可以 为什么是0呢,因为返回的是一个列表 staring = (film.select(".star")[0].text).strip().split(":")[1] #中文的冒号 releasetime = (film.select(".releasetime")[0].text).split(":")[1].split("(")[0] country = (film.select(".releasetime")[0].text).split(":")[1][10:] if country: country = country.replace("(","").replace(")","") #lstrip() rstip()等也可以实现 else: country = "(暂无)" integer = (film.select(".integer")[0].text) fraction = (film.select(".fraction")[0].text) score = integer + fraction content.append([name,staring,releasetime,country,score]) return content def WriteExcel(data): global length title = ["电影","主演","时间","国家","评分"] workbook = xlwt.Workbook(encoding = "utf-8") sheet = workbook.add_sheet("猫眼前100") row = 1 for i in range(len(title)): sheet.write(0,i,title[i]) for num in data: for num_num in num: #for num_num_num in num_num: for col in range(len(num_num)): sheet.write(row,col,num_num[col]) if col in length: if length[col] < len(num_num[col]): length[col] = len(num_num[col]) else: length.setdefault(col, len(num_num[col])) row +=1 for key,value in length.items(): sheet.col(key).width = int(256*value*2) workbook.save("maoyan.xls") def main(): all = [] for url in urls: result = FilmInformation(url) all.append(result) WriteExcel(all) if __name__ =="__main__": main() print (length)