#!/usr/bin/python3.6 # -*- coding: utf-8 -*- import requests from lxml import etree import time import os sum_page = 0 sum_images = 0 headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"} def download_page(url): # 返回响应内容 try: r = requests.get(url, headers=headers, timeout=10) r.raise_for_status() return r.text except: print('页面访问失败,', url) return None def download_pic(imgUrl): # 返回响应对象 r = requests.get(imgUrl,headers=headers, timeout=10) return r def mkdir(path): if not os.path.exists(path): os.mkdir(path) return path def get_content(html): selector = etree.HTML(html) # 第1页面 pages = selector.xpath('//div[@class="archive-row"]//ul/li//a[@class="thumb-link"]/@href') for i in range(len(pages)): global sum_page sum_page += 1 print('这是下载的第%s个页面,%s'% (sum_page, pages[i])) # 页面中的图片链接 page_html = download_page(pages[i]) page_selector = etree.HTML(page_html) img_lists = page_selector.xpath('//div[@class="entry-content"]//img/@src') # 图片保存目录 pictures = mkdir(os.path.join(os.path.curdir, 'pictures')) # 图片url for j in range(len(img_lists)): # 图片名称 file_name = os.path.basename(img_lists[j]) img_name = os.path.join(pictures, file_name) response = download_pic(img_lists[j]) with open(img_name, 'wb') as f: global sum_images sum_images += 1 res = requests.get(img_lists[j]) for chunk in response.iter_content(1024): f.write(chunk) def get_content_pages(url, html): # 翻页获取图片 selector = etree.HTML(html) with open('temp.html', 'w', encoding='utf-8') as f: f.write(html) # 首页号和最后一页面数 start_page = 1 end_page = selector.xpath('//div[@class="btn-group"]//button[last()]//text()') end_page = ''.join(end_page) if not end_page: end_page = '30' # 构建页面url while start_page <= int(end_page.strip()): page_url = url + '/page/' + str(start_page) html = download_page(page_url) if html: get_content(html) start_page += 1 def main(): url = 'https://www.jder.net/mx' html = download_page(url) get_content_pages(url, html) print('共下载图片数为:', sum_images) if __name__ == '__main__': main()
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!