dl_images_4.py

 

 

复制代码
#!/usr/bin/env python3
import os
import sys
import pandas as pd
import requests
from requests.packages.urllib3.util import Retry
from requests.adapters import HTTPAdapter
from requests import Session
import time
import logging
from logging.handlers import RotatingFileHandler
import re
from clickhouse_driver import Client
from multiprocessing import Process
from multiprocessing import cpu_count
import multiprocessing

'''
        读取csv文件的url,多次请求url,批量下载图片,
'''


def get_xg_images_url():
    df = pd.read_csv('./xg_fail_rec.csv')
    df['license_plate2']
    return df['image_url1'],df['license_plate2'],df['capture_time']
def get_am_images_url():
        df = pd.read_csv('./am_fail_rec.csv')
        df['license_plate2']
        return df['image_url1'],df['license_plate2'],df['capture_time']
def download_img(img_url,num,result_path,plateNo,ct,access_fail_c):
    os.chdir(result_path)
    # 以url命名
    #img_list =  re.findall(r'\/.*?\/.*?\/.*?\/.*?\/.*?\/.*?\/(.*\.)(JPG|jpg)',img_url)
    #img_name = img_list[0][0]+img_list[0][1]
    img_name = plateNo + '_' + ct + '+' + str(num) + '.jpg'
    retries = Retry(total=10,backoff_factor=0.1,status_forcelist=[500])
    try:
        with Session() as s:
            s.mount('http://',HTTPAdapter(max_retries=retries))
            img_obj = s.get(img_url)
    except:
        access_fail_c.value = access_fail_c.value + 1
        logger.error("connect fail {}  ".format(img_url))
        logger.info("child_process {} exited... ".format(num))
        sys.exit(1)
    if int(img_obj.status_code) != 200:
        access_fail_c.value = access_fail_c.value + 1
        logger.error("download {} fail staus = {}".format(img_url,img_obj.status_code))
    else:
        try:
            with open(img_name,'wb') as f:
                f.write(img_obj.content)
            logger.info("saved success {}  staus = {}".format(img_url,img_obj.status_code))
        except:
            access_fail_c.value = access_fail_c.value + 1
            logger.error("saved fail {} fail staus = {}".format(img_url,img_obj.status_code))
    logger.info("child_process {} exited... ".format(num))
def start_process():
        access_fail_c = multiprocessing.Value('d',0)
    # 下载港牌图片
        image_url,license_plate,cts = get_xg_images_url()
        print('港牌url总数: {}'.format(len(image_url)))
        process_list = []
        i = 0 
        result_path = os.path.dirname(os.path.abspath(__file__)) + "/fail_images/fail_xg/"
        for url in image_url:
            if len(process_list) == cpu_count():
                while True:
                    #time.sleep(1)
                    flag = 0 
                    for p in process_list:
                        if not p.is_alive():
                            process_list.remove(p)
                            flag = 1 
                    if flag == 1:
                        break
            Pro = Process(target=download_img,args=(url,i,result_path,license_plate[i],cts[i],access_fail_c))
            logger.info("child_process {} started... ".format(i))
            Pro.start()
            process_list.append(Pro)
            i = i + 1 
        for p in process_list:
            p.join()
        # 下载澳牌图片
        image_url,license_plate,cts = get_am_images_url()
        print('澳牌url总数: {}'.format(len(image_url)))
        i = 0
        result_path = os.path.dirname(os.path.abspath(__file__)) + "/fail_images/fail_am/"
        for url in image_url:
            if len(process_list) == cpu_count():
                while True:
                    #time.sleep(1)
                    flag = 0
                    for p in process_list:
                        if not p.is_alive():
                            process_list.remove(p)
                            flag = 1
                    if flag == 1:
                        break
            Pro = Process(target=download_img,args=(url,i,result_path,license_plate[i],cts[i],access_fail_c))
            logger.info("child_process {} started... ".format(i))
            Pro.start()
            process_list.append(Pro)
            i = i + 1
        for p in process_list:
            p.join()
        print('访问图片失败总数:{}'.format(access_fail_c.value))
if __name__ == '__main__':
      logger = logging.getLogger()
      logger.setLevel(logging.INFO)
      handler = RotatingFileHandler(os.path.dirname(os.path.abspath(__file__)) + "/dl_images.log")
      formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
      handler.setFormatter(formatter)
      logger.addHandler(handler)
      start_process()
复制代码

 

posted @   苦逼yw  阅读(3)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 【杭电多校比赛记录】2025“钉耙编程”中国大学生算法设计春季联赛(1)
点击右上角即可分享
微信分享提示