爬虫7-多线程爬取壁纸族
# -*- coding: utf-8 -*- """ @Time : 2022/3/22 16:57 @Author : Andrew @File : 多线程应用.py """ # 1.如何提取单个页面的数据 # 2.设置线程池,多个页面同时抓取 from concurrent.futures.thread import ThreadPoolExecutor import requests from lxml import etree def downloadOnePage(url): # 拿到页面源代码 resp = requests.get(url) html = etree.HTML(resp.text) ul = html.xpath("/html/body/div[4]/div[5]/ul")[0] lis = ul.xpath("./li") for li in lis: href = li.xpath("./a/@href")[0] imgName = li.xpath("./a/@title")[0] resp2 = requests.get(href) html2 = etree.HTML(resp2.text) src = html2.xpath("/html/body/div[4]/div[2]/div[2]/a[1]/img/@src")[0] # 下载图片 img = requests.get(src) with open("./多线程爬的壁纸族/" + imgName + "." + src.split(".")[-1], mode="wb") as f: f.write(img.content) f.close() print(imgName + ":下载完毕!!") # break resp.close() if __name__ == "__main__": # for i in range(1,41): # 效率低 # url = f"https://www.bizhizu.com/sj/fengguang/list-{i}.html" # downloadOnePage(url) with ThreadPoolExecutor(41) as f: # 多线程 for i in range(1, 41): f.submit(downloadOnePage, f"https://www.bizhizu.com/sj/fengguang/list-{i}.html")
分类:
python
标签:
python爬虫笔记
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· winform 绘制太阳,地球,月球 运作规律
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾(3.3-3.9)
· AI 智能体引爆开源社区「GitHub 热点速览」
· 写一个简单的SQL生成工具