随笔 - 20, 文章 - 0, 评论 - 4, 阅读 - 23114

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

批量下载某网的美女图片

Posted on   初之萌萌  阅读(198)  评论(0编辑  收藏  举报
复制代码
#!/usr/bin/python3.6
# -*- coding: utf-8 -*-

import requests
from lxml import etree
import time
import os

sum_page = 0
sum_images = 0

headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"}
def download_page(url):
    # 返回响应内容
    try:
        r = requests.get(url, headers=headers, timeout=10)
        r.raise_for_status()
        return r.text
    except:
        print('页面访问失败,', url)
        return None

def download_pic(imgUrl):
    # 返回响应对象
    r = requests.get(imgUrl,headers=headers, timeout=10)
    return r

def mkdir(path):
    if not os.path.exists(path):
        os.mkdir(path)
    return path

def get_content(html):
    selector = etree.HTML(html)
    # 第1页面
    pages = selector.xpath('//div[@class="archive-row"]//ul/li//a[@class="thumb-link"]/@href')

    for i in range(len(pages)):
        global sum_page
        sum_page += 1
        print('这是下载的第%s个页面,%s'% (sum_page, pages[i]))
        # 页面中的图片链接
        page_html = download_page(pages[i])
        page_selector = etree.HTML(page_html)
        img_lists = page_selector.xpath('//div[@class="entry-content"]//img/@src')
        # 图片保存目录
        pictures = mkdir(os.path.join(os.path.curdir, 'pictures'))

        # 图片url
        for j in range(len(img_lists)):
            # 图片名称
            file_name = os.path.basename(img_lists[j])
            img_name = os.path.join(pictures, file_name)
            response = download_pic(img_lists[j])

            with open(img_name, 'wb') as f:
                global sum_images
                sum_images += 1
                res = requests.get(img_lists[j])
                for chunk in response.iter_content(1024):
                    f.write(chunk)

def get_content_pages(url, html):
    # 翻页获取图片
    selector = etree.HTML(html)
    with open('temp.html', 'w', encoding='utf-8') as f:
        f.write(html)
    # 首页号和最后一页面数
    start_page = 1
    end_page = selector.xpath('//div[@class="btn-group"]//button[last()]//text()')
    end_page = ''.join(end_page)
    if not end_page:
        end_page = '30'
    # 构建页面url
    while start_page <= int(end_page.strip()):
        page_url = url + '/page/' + str(start_page)
        html = download_page(page_url)
        if html:
            get_content(html)
        start_page += 1

def main():
    url = 'https://www.jder.net/mx'
    html = download_page(url)
    get_content_pages(url, html)
    print('共下载图片数为:', sum_images)

if __name__ == '__main__':
    main()
复制代码

 

相关博文:
阅读排行:
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!
点击右上角即可分享
微信分享提示