代码:
import os import requests from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse # 检查URL是否有效 def is_valid_url(url): parsed = urlparse(url) return bool(parsed.netloc) and bool(parsed.scheme) # 检查URL是否指向图片 def is_image_url(url): try: response = requests.head(url) content_type = response.headers.get('Content-Type') return content_type and content_type.startswith('image/') except requests.RequestException: return False # 递归下载指定URL中的所有图片 def get_images_from_url(url, download_folder): if is_valid_url(url): try: response = requests.get(url) response.raise_for_status() if is_image_url(url): # 直接下载图片 img_name = os.path.join(download_folder, os.path.basename(url)) with open(img_name, 'wb') as f: f.write(response.content) print(f"图片已下载:{img_name}") else: # 解析HTML页面以查找所有图片标签 soup = BeautifulSoup(response.text, 'html.parser') img_tags = soup.find_all('img') for img in img_tags: img_url = img.get('src') if img_url: # 构造完整的图片URL img_url = urljoin(url, img_url) parsed_img_url = urlparse(img_url) if parsed_img_url.netloc: # 确保文件的目录存在 filename = os.path.basename(parsed_img_url.path) img_name = os.path.join(download_folder, filename) os.makedirs(os.path.dirname(img_name), exist_ok=True) # 检查图片URL是否指向图片 if is_image_url(img_url): with open(img_name, 'wb') as f: img_response = requests.get(img_url) img_response.raise_for_status() f.write(img_response.content) print(f"图片已下载:{img_name}") else: # 图片URL指向网页,递归下载网页中的图片 get_images_from_url(img_url, download_folder) else: # 相对路径,补全URL后递归下载 base_url = urljoin(url, img_url.rstrip('/')) get_images_from_url(base_url, download_folder) except requests.exceptions.RequestException as e: print(f"请求过程中发生错误:{e}") # 要开始爬取的URL start_url = 'https://www.baidu.com/' # 保存下载图片的文件夹 download_folder = r'F:\jingguan\zhao' # 如果下载文件夹不存在,则创建它 if not os.path.exists(download_folder): os.makedirs(download_folder) # 开始递归下载图片 get_images_from_url(start_url, download_folder) print("图片下载完成。")
分类:
python小程序
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· Docker 太简单,K8s 太复杂?w7panel 让容器管理更轻松!
2022-04-28 人脸训练
2020-04-28 Vue -路由(Vue -Router)
2020-04-28 Vue常用的UI组件-Elment(PC端Vue组件库)(饿了么组件)(推荐)
2020-04-28 Vue常用的UI组件-ant-design-vue
2020-04-28 Vue常用的UI组件-Mint UI(移动端Vue组件库)(饿了么组件)
2020-04-28 Vue常用的UI组件-vant(轻量、可靠的移动端Vue组件库)(推荐)