一、步骤
1、拿到主页面的前端源码,然后提取子页面的链接地址
2、通过href拿到子页面的内容。从子页面中找到图片的下载地址
3、下载图片
二、代码
import requests
from bs4 import BeautifulSoup
import os
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.54 Safari/537.36 Edg/95.0.1020.40"
}
url = "https://umei.cc/bizhitupian/weimeibizhi/"
resp = requests.get(url,headers)
resp.encoding = 'tuf-8'
main_page = BeautifulSoup(resp.text,"html.parser")
alist = main_page.find("div",class_="TypeList").find_all("a")
for i in alist:
href = i.get('href')
#获取子页面完整url
child_href = url + href.split("/")[-1]
child_href_resp = requests.get(child_href,headers)
child_href_resp.encoding = 'tuf-8'
child_href_text = child_href_resp.text
child_page = BeautifulSoup(child_href_text,"html.parser")
# 定位图片地址
p = child_page.find("p",align="center")
img = p.find('img')
# 获取图片URL
src = img.get("src")
img_resp = requests.get(src,headers=headers)
img_name = src.split("/")[-1]
# 判断所在目录下是否有该文件名的文件夹
if not os.path.exists('img'):
# 不存在创建
os.makedirs('img')
else:
# 存在打开写入,with open在windows下不存在会直接创建,但是在linux下我没成功所以加了个判断
with open("img/"+img_name,mode="wb") as f:
# img_resp.content拿到的是字节
f.write(img_resp.content)
resp.close()