python下载站长素材免费简历模板(xpath)
import os.path import requests from lxml import etree if __name__ == '__main__': if not os.path.exists('./jianli'): os.mkdir('./jianli') headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36' } # 下载前两页模板 for i in range(1, 3): if i == 1: url = 'https://sc.chinaz.com/jianli/free.html' else: url = 'https://sc.chinaz.com/jianli/free_' + str(i) + '.html' page = requests.get(url=url, headers=headers) page.encoding = 'utf-8' tree = etree.HTML(page.text) free_jianli = tree.xpath('//div[@id="main"]/div/div') for free in free_jianli: # 获取链接 free_url = free.xpath('./a/@href')[0] # 简历标题 free_title = free.xpath('./a/img/@alt')[0] # 根据链接获取到对应下载页面 free_content = requests.get(url=free_url, headers=headers).text free_content_tree = etree.HTML(free_content) # 找到下载链接 down_path = free_content_tree.xpath('//div[@class="down_wrap"]/div[2]/ul/li[1]/a/@href')[0] # 下载内容的标题 down_path_title = free_title + '.' + down_path.split('.')[-1] # 根据下载链接进行二进制数据下载 down_path_content = requests.get(url=down_path, headers=headers).content # 存入文件中 with open('./jianli/' + down_path_title, 'wb') as fp: fp.write(down_path_content) print(down_path_title, "下载成功") print("第{0}页下载成功".format(i)) print('下载完成')