python下载站长素材免费简历模板(xpath)

import os.path

import requests
from lxml import etree

if __name__ == '__main__':
    if not os.path.exists('./jianli'):
        os.mkdir('./jianli')

    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
    }
    # 下载前两页模板
    for i in range(1, 3):
        if i == 1:
            url = 'https://sc.chinaz.com/jianli/free.html'
        else:
            url = 'https://sc.chinaz.com/jianli/free_' + str(i) + '.html'
        page = requests.get(url=url, headers=headers)
        page.encoding = 'utf-8'
        tree = etree.HTML(page.text)
        free_jianli = tree.xpath('//div[@id="main"]/div/div')

        for free in free_jianli:
            # 获取链接
            free_url = free.xpath('./a/@href')[0]
            # 简历标题
            free_title = free.xpath('./a/img/@alt')[0]
            # 根据链接获取到对应下载页面
            free_content = requests.get(url=free_url, headers=headers).text
            free_content_tree = etree.HTML(free_content)
            # 找到下载链接
            down_path = free_content_tree.xpath('//div[@class="down_wrap"]/div[2]/ul/li[1]/a/@href')[0]
            # 下载内容的标题
            down_path_title = free_title + '.' + down_path.split('.')[-1]
            # 根据下载链接进行二进制数据下载
            down_path_content = requests.get(url=down_path, headers=headers).content
            # 存入文件中
            with open('./jianli/' + down_path_title, 'wb') as fp:
                fp.write(down_path_content)
                print(down_path_title, "下载成功")
        print("第{0}页下载成功".format(i))
    print('下载完成')

 

posted @ 2022-10-19 18:22  没有童话的鱼  阅读(97)  评论(0编辑  收藏  举报