使用 requests_html 批量下载图片
# 自学, 不足之处还请大佬不吝指导,在此谢过. from requests_html import HTMLSession from lxml import etree import re import urllib3 urllib3.disable_warnings() url = "https://www.q.com/feature/travel/2527.html" header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36 Edg/90.0.818.56', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6' } session = HTMLSession() session.verify = False r = session.get(url, headers = header) # render()等价于浏览器打开 r.html.render() # 使用render()渲染 首次使用, 自动下载chromium # $('.img') images = r.html.find('.img') for i in images: # 链接地址 links = i.xpath('.//a')[0].attrs.get('href') # /imgbuy/105-0128.html buy_links = 'https://www.q.com' + links # https://www.q.com/imgbuy/105-0128.html htmllink = re.findall("/imgbuy/(.+?)$",links)[0] # 105-0128.html # print(links, buy_links,htmllink) # 标题 title = i.xpath('.//a/img')[0].attrs.get('alt',"未获取到标题") if len(title) < 1: title ="未取到标题"+ htmllink # 解决文件保存的文件名同名问题,否则保存图片时文件重复无法保存(覆盖). else: title = title +htmllink # 图片地址 src = str(i.xpath('.//a/img')[0].attrs.get('lowsrc')) # 取出的值为list,需使用stc()转换为字符串 print(title, src) try: #保存图片 r_save_pic = session.get(src, headers = header) # r.content with open("D:/Pictures/OP/"+"%s.jpg"%str(title),"wb") as fp: fp.write(r_save_pic.content) except Exception as msg: print("下载中出现异常:%s"%str(msg)) r.close()