python爬虫案列06:简历模板下载
# 导入需要的模块
import requests
import os
from lxml import etree
# 请求网页
url = 'https://aspx.sc.chinaz.com/query.aspx?'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}
params = {
'keyword': '免费',
'issale': 0,
'classID': 864,
'page': 4,
}
# 创建文件夹
# if not os.path.exists('./download'):
# os.makedirs('./download')
# 发起请求,并用etree解析
response = requests.get(url, params=params, headers=headers).text
html = etree.HTML(response)
hrefs = html.xpath('//*[@id="container"]/div/a/@href')
for href in hrefs:
# 跳转到模版详情
resp = requests.get(url=href)
resp.encoding = 'utf8'
down_links = etree.HTML(resp.text)
# 拿到简历名字和下载地址
names = down_links.xpath('//h1/text()')[0]
links = down_links.xpath('//*[@id="down"]/div[2]/ul/li[1]/a/@href')[0]
# 简历的保存路径和名字
down_name = "E:\\python\\01初识爬虫,requests使用\\download\\" + names + ".rar"
# 下载
down = requests.get(url=links).content
# 保存
with open(down_name, 'wb') as f:
f.write(down)
print("Downloading")
f.close()
print("下载完了!!!")