爬百度图片

#!/usr/bin/env python
# _*_ coding: utf-8 _*_
# @Time : 2022/9/8 14:31 
# @Author : AndyXi
# @Version：V 0.1
# @File : 爬img.py
# @desc :

import json
from datetime import datetime
import time
import requests
from tqdm import tqdm

def get_filename():
    return datetime.now().strftime("%Y-%m-%d-%H-%M-%S-%f")


if __name__ == "__main__":
    word = input("请输入要爬取的关键字: ")
    page_size = int(input("请输入要爬取的张数: "))

    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36"
    }


    res_img = requests.get(f"https://image.baidu.com/search/acjson?tn=resultjson_com&logid=8057700054872665483&ipn=rj&ct=201326592&is="
                          f"&fp=result&fr=&word={word}"
                          f"&cg=star&queryWord={word}"
                          "&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=&copyright=&s=&se=&tab=&width="
                          "&height=&face=0&istype=2&qc=&nc=1&expermode="
                          f"&nojc=&isAsync=&pn=60&rn={page_size}&gsm=3c&1662621074446=",
                           headers=header)
    res_dic = json.loads(res_img.text)

    ################以下能实现功能，但性能不行#######################
    # i=1
    # for item in res_dic["data"]:
    #     img_url = item.get("thumbURL", "")
    #     img_data = requests.get(img_url,headers=header)
    #     with open(f"downlod_img/{get_filename()}.jpg","wb") as f:
    #         print(f"正在下载第{i}张图片")
    #         f.write(img_data.content)
    #         i+=1
    #         time.sleep(1)

    ###################################进度条功能及内存使用优化功能######################
    for item in res_dic["data"]:
        img_url = item.get("thumbURL","")
        img_data = requests.get(img_url,headers=header,stream=True)
        if "content-length" in img_data.headers:
            ###获取图片大小,大小为b
            content_size = int(img_data.headers["content-length"])
            img_name = get_filename() + ".jpg"
        with open(f"downlod_img/{get_filename()}.jpg","wb") as f,tqdm(desc=img_name,total=content_size) as bar:
            for chunk in img_data.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
                    bar.update(len(chunk))
posted @ 2022-09-08 16:51 青空如璃阅读(21) 评论(0) 编辑收藏举报
刷新页面返回顶部
青空如璃

爬百度图片

公告