站长素材免费简历模板爬取

import requests
import os
from lxml import etree
if __name__ == '__main__':
# 如果没有JianLi文件夹存在 则创建文件夹
if not os.path.exists('./JianLi'):
os.makedirs('./JianLi')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36 Edg/89.0.774.48'
}
url = 'https://sc.chinaz.com/jianli/free.html'
page_text = requests.get(url=url, headers=headers).text
# print(page_text)
tree = etree.HTML(page_text)
# print(tree)
all_src = tree.xpath('//div[@class="sc_warp mt20"]//p/a/@href')
# print(all_src)
for src in all_src:
all_srcs = 'https:' + src
# print(all_srcs)
jianli_content = requests.get(url=all_srcs, headers=headers).text
# print(jianli_url)
tree = etree.HTML(jianli_content)
down_url_list = tree.xpath('//div[@class="down_wrap"]//ul/li/a/@href')[0]
jianli_data = requests.get(url=down_url_list, headers=headers).content
# print(down_url_list)
# name = down_url_list.split('/')[-1]
name = tree.xpath('//div[@class="ppt_tit clearfix"]/h1/text()')[0] + '.rar'
down_name = name.encode('ISO-8859-1').decode('UTF-8')
# print(name,down_name)
JianLi_path = './JianLi/' + down_name
with open(JianLi_path, 'wb') as fp:
fp.write(jianli_data)
print(down_name, '下载成功!!!')
print('下载完成!!!')
posted @   YangSaid  阅读(48)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
点击右上角即可分享
微信分享提示