python多进程: multiprocessing Pool 和tqdm

https://blog.csdn.net/qq_39694935/article/details/84552076

【Python】multiprocessing Pool 进程间通信共享

1. tqdm模块的简洁使用

直接上代码:

from tqdm import tqdm
from multiprocessing import Pool
import functools
from pymongo import MongoClient
mdb = MongoClient('120.xx.26.xx:20002', username='xx', password='xxxxx')

# 三种main的写法只写一种即可

def create_data(image):

    # TODO 具体处理逻辑
    print(image)
    return str(image)


def main_deal():

    num_processor = 20
    p = Pool(num_processor)
    images = mdb['db_name']['image'].find(no_cursor_timeout=True).batch_size(200)

    fw = open('result.txt', 'w+')
    for result in tqdm(p.imap(create_data, images), total=images.count()):
        fw.write(result + '\n')
    fw.close()

    for _ in tqdm(p.imap_unordered(create_data, images)):
        pass
    p.close()
    p.join()


def main_deal():

    num_processor = 20
    p = Pool(num_processor)

    images = mdb['goodlook']['image_generated_data'].find(no_cursor_timeout=True).batch_size(200)
    fw = open('result.txt', 'w+')
    for result in tqdm(p.imap_unordered(create_data, images)):
        fw.write(result + '\n')
    fw.close()

    p.close()
    p.join()


def main_deal():
    num_processor = 20
    p = Pool(num_processor)

    images = mdb['goodlook']['image_generated_data'].find(no_cursor_timeout=True).batch_size(200)
    fw = open('result.txt', 'w+')

    pt = functools.partial(create_data)
    for result in tqdm(p.imap_unordered(pt, images)):
        fw.write(result + '\n')
    fw.close()

    p.close()
    p.join()


if __name__ == '__main__':
    main_deal()

2.进程池多进程

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# __author__ = "Victor"
# Date: 2020/6/18

import traceback
import multiprocessing
from multiprocessing import Pool

concurrent_num = 10


def task_run(data, msg):
    try:
        # time.sleep(random.randrange(1, 4))
        msg = multiprocessing.current_process().name + '-' + msg
        print(f"hello world : {data}, {msg}")

    except Exception as e:
        traceback.print_exc()
        print("error: ", e)

    return None


if __name__ == '__main__':

    data = {}
    p = Pool(concurrent_num)

    for i in range(concurrent_num):
        msg = 'index-%d' % i
        p.apply_async(task_run, (data, msg,))

    p.close()
    p.join()

3. 进程池和调度器模块的冲突

#!/usr/bin/env python
# -*- encoding: utf-8 -*-
''' 
@Author: Victor
@Contact: 
@Date: 2020/10/15
@function: ''
'''

from apscheduler.schedulers.blocking import BlockingScheduler
from apscheduler.schedulers.background import BackgroundScheduler

import time
import random
import multiprocessing


class TodayCollection(object):

    def __init__(self):
        self.name = "今日采集类"
        self.scheduler = BlockingScheduler()

    def execute_tasks(self, index, d_arr, p_lock):
        try:

            # 一般用于写数据库
            # if p_lock:
            #     p_lock.acquire()
            # print(index, d_arr)
            # if p_lock:
            #     p_lock.release()

            # p_lock.acquire()
            # print(index, d_arr)
            # p_lock.release()

            while True:
                print(index, d_arr, random.random())

        except Exception as ex:
            print(ex)

    def start(self):
        groups = [[1, 3, 22], [3, 4, 6, 8], [3, 3, 4, 4], [3, 5, 6, 7]]
        manager = multiprocessing.Manager()
        p_lock = manager.Lock()
        pool = multiprocessing.Pool(processes=4)
        for index, d_arr in enumerate(groups):
            if d_arr:
                pool.apply_async(self.execute_tasks, (index, d_arr, p_lock))

        pool.close()
        pool.join()
        pool.terminate()


if __name__ == '__main__':

    # apscheduler的BlockingScheduler和BackgroundScheduler导致多进程异常退出
    # 要想正常直接去掉self.scheduler = BlockingScheduler()
    TodayCollection().start()

  

 

posted @ 2019-03-13 15:39  Adamanter  阅读(2216)  评论(0编辑  收藏  举报