python(进程池/线程池)

进程池

import multiprocessing
import time

def do_calculation(data):
    print(multiprocessing.current_process().name + " " + str(data))
    time.sleep(3)
    return data * 2

def start_process():
    print ('Starting', multiprocessing.current_process().name)

if __name__ == '__main__':
    inputs = list(range(10))
    print ('Input   :' + str(inputs)) 
    
    pool_size = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=pool_size,
                                initializer=start_process,
                                )

#more inputs
    #more_inputs = [11,12,13,14,15,16,17,18,19,20]
    #pool_outputs = pool.map(do_calculation, more_inputs)

pool_outputs
= pool.map(do_calculation, inputs) pool.close() # no more tasks pool.join() # wrap up current tasks print ('Pool :' + str(pool_outputs))

运行如下:

root # python pool.py
Input   :[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
('Starting', 'PoolWorker-1')
PoolWorker-1 0
('Starting', 'PoolWorker-2')
PoolWorker-2 2
PoolWorker-1 1
PoolWorker-2 3
PoolWorker-1 4
PoolWorker-2 6
PoolWorker-1 5
PoolWorker-2 7
PoolWorker-1 8
PoolWorker-1 9
Pool    :[0, 2, 4, 6, 8, 10, 12, 14, 16, 18]

如果任务是动态添加可以用如下代码

import multiprocessing
import time

def worker(data):
    if data == 3:
        time.sleep(10)
    else:
        time.sleep(1)
    print(multiprocessing.current_process().name + " " + str(data))
    return data * 2

def start_process():
    print ('Starting', multiprocessing.current_process().name)

#callback func
def say(res):
    print res

if __name__ == '__main__':
    inputs = list(range(10))
    print ('Input   :' + str(inputs))

    pool_size = multiprocessing.cpu_count()
    pool = multiprocessing.Pool(processes=pool_size,
                                initializer=start_process,
                                )

    for i in inputs:
        pool.apply_async(worker, (i,))
        #pool.apply_async(worker, (i,), callback=say)

    pool.close() # no more tasks
    pool.join()  # wrap up current tasks

输出如下:

root # python pool2.py
Input   :[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
('Starting', 'PoolWorker-1')
('Starting', 'PoolWorker-2')
PoolWorker-1 0
PoolWorker-2 1
PoolWorker-1 2
PoolWorker-1 4
PoolWorker-1 5
PoolWorker-1 6
PoolWorker-1 7
PoolWorker-1 8
PoolWorker-1 9
PoolWorker-2 3

线程池

import Queue
import sys
import threading
import time

thread_count = 2
mutex = threading.Lock() 

class MyThread(threading.Thread):  
    def __init__(self, workQueue, resultQueue,timeout=0, **kwargs):  
        threading.Thread.__init__(self, kwargs=kwargs)  
        self.timeout = 1   
        self.setDaemon(True)  
        self.workQueue = workQueue  
        self.resultQueue = resultQueue  
        self.start()  
  
    def run(self):  
        while True:  
            try:  
                callable, args, kwargs = self.workQueue.get(timeout=self.timeout)  
                res = callable(args, self.getName())  
                self.resultQueue.put(res)
  
            except Queue.Empty:  
                break
            except :  
                print sys.exc_info()  
                raise  
  
  
class ThreadPool:  
    def __init__( self, num_of_threads=10):  
        self.workQueue = Queue.Queue()  
        self.resultQueue = Queue.Queue()  
        self.threads = []  
        self.__createThreadPool( num_of_threads )  
  
    def __createThreadPool( self, num_of_threads ):  
        for i in range( num_of_threads ):  
            thread = MyThread( self.workQueue, self.resultQueue )  
            self.threads.append(thread)  
  
    def wait_for_complete(self):  
        while len(self.threads):  
            thread = self.threads.pop()  
            if thread.isAlive():  
                thread.join()  
  
    def add_job( self, callable, args, **kwargs ):  
        while True:  
            if self.workQueue.qsize() < 10000:  
                self.workQueue.put( (callable,args,kwargs) )  
                break
            time.sleep(0.1)  

def worker(data, threadid):  

    if mutex.acquire(1):
        print threadid, data
        mutex.release()

    time.sleep(3)
    return data * 2


if __name__ == '__main__':
    
    threadPool = ThreadPool(thread_count)
    for i in range(10):
        threadPool.add_job(worker, i)

    threadPool.wait_for_complete()
    print 'result Queue\'s length == %d '% threadPool.resultQueue.qsize()   
    while threadPool.resultQueue.qsize():   
        print threadPool.resultQueue.get()   
    print 'end testing'

运行如下:

root # python g.py
Thread-1 0
Thread-2 1
Thread-1 2
Thread-2 3
Thread-1 4
Thread-2 5
Thread-1 6
Thread-2 7
Thread-1 8
Thread-2 9
result Queue's length == 10
0
2
4
6
8
10
12
14
16
18
end testing

简单测试了一下,如果worker函数需要做大量耗cpu的运算,用进程池速度比线程池快数倍。

2.492s VS 0.598s

posted @ 2013-06-26 15:47  ciaos  阅读(869)  评论(0编辑  收藏  举报