python 爬取豆瓣top250电影图片

import urllib.request
import requests
from lxml import html

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0'
}


def get_html(url):
    response = requests.get(url, headers=headers).text
    element = html.etree.HTML(response)
    lis = element.xpath('//ol[@class="grid_view"]')[0]
    return lis


def get_img(lis):
    for li in lis:
        global num  # 计数变量
        title = li.xpath('.//span[@class="title"]/text()')[0]  # 获取影片标题
        img_url = li.xpath('.//img/@src')[0]  # 获取封面地址
        file_name = 'D:/Python/pythonProject/Spider/images' + title + str(num) + '.jpg'
        urllib.request.urlretrieve(img_url, filename=file_name)  # 保存到本地文件中
        num += 1


def main():
    global num
    num = 1
    for i in range(10):
        url = 'https://movie.douban.com/top250?start=' + str(25 * i)
        ht = get_html(url)
        get_img(ht)
    i += 1


main()

运行结果:

posted @ 2023-04-18 16:18  万事胜意k  阅读(80)  评论(0编辑  收藏  举报