爬虫7-多线程爬取壁纸族

复制代码
# -*- coding: utf-8 -*-
"""
@Time    :  2022/3/22 16:57
@Author  : Andrew
@File    : 多线程应用.py
"""
# 1.如何提取单个页面的数据
# 2.设置线程池,多个页面同时抓取
from concurrent.futures.thread import ThreadPoolExecutor

import requests
from lxml import etree


def downloadOnePage(url):
    # 拿到页面源代码
    resp = requests.get(url)
    html = etree.HTML(resp.text)
    ul = html.xpath("/html/body/div[4]/div[5]/ul")[0]
    lis = ul.xpath("./li")
    for li in lis:
        href = li.xpath("./a/@href")[0]
        imgName = li.xpath("./a/@title")[0]
        resp2 = requests.get(href)
        html2 = etree.HTML(resp2.text)
        src = html2.xpath("/html/body/div[4]/div[2]/div[2]/a[1]/img/@src")[0]
        # 下载图片
        img = requests.get(src)
        with open("./多线程爬的壁纸族/" + imgName + "." + src.split(".")[-1], mode="wb") as f:
            f.write(img.content)
        f.close()
        print(imgName + ":下载完毕!!")
        # break
    resp.close()


if __name__ == "__main__":
    # for i in range(1,41):  # 效率低
    #     url = f"https://www.bizhizu.com/sj/fengguang/list-{i}.html"
    #     downloadOnePage(url)
    with ThreadPoolExecutor(41) as f:  # 多线程
        for i in range(1, 41):
            f.submit(downloadOnePage, f"https://www.bizhizu.com/sj/fengguang/list-{i}.html")
复制代码

 

posted @   乔十六  阅读(39)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 上周热点回顾(3.3-3.9)
· AI 智能体引爆开源社区「GitHub 热点速览」
· 写一个简单的SQL生成工具
点击右上角即可分享
微信分享提示