python 爬取豆瓣top250电影图片
import urllib.request
import requests
from lxml import html
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0'
}
def get_html(url):
response = requests.get(url, headers=headers).text
element = html.etree.HTML(response)
lis = element.xpath('//ol[@class="grid_view"]')[0]
return lis
def get_img(lis):
for li in lis:
global num # 计数变量
title = li.xpath('.//span[@class="title"]/text()')[0] # 获取影片标题
img_url = li.xpath('.//img/@src')[0] # 获取封面地址
file_name = 'D:/Python/pythonProject/Spider/images' + title + str(num) + '.jpg'
urllib.request.urlretrieve(img_url, filename=file_name) # 保存到本地文件中
num += 1
def main():
global num
num = 1
for i in range(10):
url = 'https://movie.douban.com/top250?start=' + str(25 * i)
ht = get_html(url)
get_img(ht)
i += 1
main()
运行结果: