python爬虫案列06:简历模板下载

# 导入需要的模块
import requests
import os
from lxml import etree


# 请求网页
url = 'https://aspx.sc.chinaz.com/query.aspx?'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}
params = {
    'keyword': '免费',
    'issale': 0,
    'classID': 864,
    'page': 4,
}
# 创建文件夹
# if not os.path.exists('./download'):
#     os.makedirs('./download')

# 发起请求,并用etree解析
response = requests.get(url, params=params, headers=headers).text
html = etree.HTML(response)

hrefs = html.xpath('//*[@id="container"]/div/a/@href')
for href in hrefs:
    # 跳转到模版详情
    resp = requests.get(url=href)
    resp.encoding = 'utf8'
    down_links = etree.HTML(resp.text)

    # 拿到简历名字和下载地址
    names = down_links.xpath('//h1/text()')[0]
    links = down_links.xpath('//*[@id="down"]/div[2]/ul/li[1]/a/@href')[0]

    # 简历的保存路径和名字
    down_name = "E:\\python\\01初识爬虫,requests使用\\download\\" + names + ".rar"
    # 下载
    down = requests.get(url=links).content
    # 保存
    with open(down_name, 'wb') as f:
        f.write(down)
        print("Downloading")
f.close()

print("下载完了!!!")

 

posted @ 2023-03-14 14:07  shuxi_520  阅读(55)  评论(0编辑  收藏  举报