至美化爬虫
就先上代码吧。
# 手机壁纸
# 单页版本
import os
import requests
from bs4 import BeautifulSoup
all_in_url = 'https://zhutix.com/mobile/page/1/'
res = BeautifulSoup((requests.get(all_in_url)).text, 'lxml')
all_li = res.findAll('a', class_="imglist-char shu")
all_a = res.findAll('a', class_="imglist-char shu", target="_blank")
print('这一页共有' + str(len(all_li))+'个合集')
name_link = {}
for i in range(len(all_a)):
aa = all_a[i].getText()
a = all_li[i]['href']
cont = name_link[aa] = a
the_num = 0
# 这个页面的所有合集
for value in name_link.values():
all_imgs = []
res = BeautifulSoup((requests.get(value)).text, 'lxml')
img = res.findAll('div', class_="thumbCont")
all_img = img[0]('img')
for i in range(len(all_img)):
all_imgs.append(all_img[i]['url'])
# 创建文件夹合集对应的 与 适当的提示
k_key = list(name_link.keys())[list(name_link.values()).index(str(value))]
the_num += 1
print('第‖ {} ‖合集『{}』正在解析'.format(the_num, k_key))
dd = "C:/Users/Administrator/PycharmProjects/2021合集/爬虫/至美化爬虫/手机壁纸/"
dir = dd + 'imgs/' + str(k_key)
try:
if os.path.exists(dir):
print('Yes')
else:
os.mkdir(dir)
except:
os.mkdir(dir)
# 下载图片
num = 0
for lin in all_imgs:
num = num + 1
name = str(num) + '.jpg'
name = dir + '/' + name
r = requests.get(lin)
try:
with open(name, "wb") as f:
f.write(r.content)
print('{}合集的第{}张下载成功'.format(k_key, num))
except:
print("============文件名有误============")
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 多页版本
import os
import requests
from bs4 import BeautifulSoup
url = 'https://zhutix.com/mobile/'
res = requests.get(url)
bres = BeautifulSoup(res.text, 'lxml')
num = bres.findAll('div', class_="pagination shu")
t_num = len(num[0]('li'))-3
print('共有' + str(t_num) + '页')
for i in range(t_num):
all_in_url = 'https://zhutix.com/mobile/page/{}/'.format(str(i+1))
res = BeautifulSoup((requests.get(all_in_url)).text, 'lxml')
all_li = res.findAll('a', class_="imglist-char shu")
all_a = res.findAll('a', class_="imglist-char shu", target="_blank")
print('这一页共有' + str(len(all_li))+'个合集')
name_link = {}
for i in range(len(all_a)):
aa = all_a[i].getText()
a = all_li[i]['href']
cont = name_link[aa] = a
the_num = 0
# 这个页面的所有合集
for value in name_link.values():
all_imgs = []
res = BeautifulSoup((requests.get(value)).text, 'lxml')
img = res.findAll('div', class_="thumbCont")
all_img = img[0]('img')
for i in range(len(all_img)):
all_imgs.append(all_img[i]['url'])
# 创建文件夹合集对应的 与 适当的提示
k_key = list(name_link.keys())[list(name_link.values()).index(str(value))]
the_num += 1
print('第‖ {} ‖合集『{}』正在解析'.format(the_num, k_key))
dd = "C:/Users/Administrator/PycharmProjects/2021合集/爬虫/至美化爬虫/手机壁纸/"
dir = dd + 'imgs/' + str(k_key)
try:
if os.path.exists(dir):
print('Yes')
else:
os.mkdir(dir)
except:
os.mkdir(dir)
# 下载图片
num = 0
for lin in all_imgs:
num = num + 1
name = str(num) + '.jpg'
name = dir + '/' + name
r = requests.get(lin)
try:
with open(name, "wb") as f:
f.write(r.content)
print('{}合集的第{}张下载成功'.format(k_key, num))
except:
print("============文件名有误============")
# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
# 电脑壁纸
# 直接在手机壁纸多页版本上进行设计
import os
import requests
from bs4 import BeautifulSoup
url = 'https://zhutix.com/wallpaper/'
res = requests.get(url)
bres = BeautifulSoup(res.text, 'lxml')
num = bres.findAll('div', class_="pagination shu")
t_num = len(num[0]('li'))-3
print('共有' + str(t_num) + '页')
for i in range(t_num):
all_in_url = 'https://zhutix.com/wallpaper/page/{}/'.format(str(i+1))
res = BeautifulSoup((requests.get(all_in_url)).text, 'lxml')
all_li = res.findAll('a', class_="imglist-char shu")
all_a = res.findAll('a', class_="imglist-char shu", target="_blank")
print('这一页共有' + str(len(all_li))+'个合集')
name_link = {}
for i in range(len(all_a)):
aa = all_a[i].getText()
a = all_li[i]['href']
cont = name_link[aa] = a
the_num = 0
# 这个页面的所有合集
for value in name_link.values():
all_imgs = []
res = BeautifulSoup((requests.get(value)).text, 'lxml')
img = res.findAll('div', class_="thumbCont")
all_img = img[0]('img')
for i in range(len(all_img)):
all_imgs.append(all_img[i]['url'])
# 创建文件夹合集对应的 与 适当的提示
k_key = list(name_link.keys())[list(name_link.values()).index(str(value))]
the_num += 1
print('第‖ {} ‖合集『{}』正在解析'.format(the_num, k_key))
dd = "C:/Users/Administrator/PycharmProjects/2021合集/爬虫/至美化爬虫/电脑壁纸/"
dir = dd + 'imgs/' + str(k_key)
try:
if os.path.exists(dir):
print('Yes')
else:
os.mkdir(dir)
except:
os.mkdir(dir)
# 下载图片
num = 0
for lin in all_imgs:
num = num + 1
name = str(num) + '.jpg'
name = dir + '/' + name
r = requests.get(lin)
try:
with open(name, "wb") as f:
f.write(r.content)
print('{}合集的第{}张下载成功'.format(k_key, num))
except:
print("============文件名有误============")
```
`
目前就这些了。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 单元测试从入门到精通
· 上周热点回顾(3.3-3.9)
· winform 绘制太阳,地球,月球 运作规律