Python学习——Python进程
python中的多线程其实并不是真正的多线程,如果想要充分地使用多核CPU的资源,在python中大部分情况需要使用多进程。Python提供了非常好用的多进程包multiprocessing,只需要定义一个函数,Python会完成其他所有事情。借助这个包,可以轻松完成从单进程到并发执行的转换。multiprocessing支持子进程、通信和共享数据、执行不同形式的同步,提供了Process、Queue、Pipe、Lock等组件。
1、Process
创建进程的类:Process([group [, target [, name [, args [, kwargs]]]]]),target表示调用对象,args表示调用对象的位置参数元组。kwargs表示调用对象的字典。name为别名。group实质上不使用。
方法:is_alive()、join([timeout])、run()、start()、terminate()。其中,Process以start()启动某个进程。
属性:authkey、daemon(要通过start()设置)、exitcode(进程在运行时为None、如果为–N,表示被信号N结束)、name、pid。其中daemon是父进程终止后自动终止,且自己不能产生新进程,必须在start()之前设置。
注:
is_live()用来查看进程的状态
terminate()用来终止进程。
单进程:
1 import multiprocessing 2 import time 3 def worker(interval): 4 n=5 5 while n > 0: 6 print("The time is {0}".format(time.ctime())) 7 time.sleep(interval) 8 n -=1 9 10 if __name__ == "__main__": 11 p = multiprocessing.Process(target=worker,args=(3,)) 12 p.start() 13 print("p.pid:",p.pid) 14 print("p.name:",p.name) 15 print("p.is_alive:",p.is_alive())
多进程:
1 import multiprocessing 2 import time 3 4 def worker_1(interval): 5 print ("worker_1") 6 time.sleep(interval) 7 print ("end worker_1") 8 9 def worker_2(interval): 10 print ("worker_2") 11 time.sleep(interval) 12 print ("end worker_2") 13 14 def worker_3(interval): 15 print ("worker_3") 16 time.sleep(interval) 17 print ("end worker_3") 18 19 if __name__ == "__main__": 20 p1 = multiprocessing.Process(target = worker_1, args = (2,)) 21 p2 = multiprocessing.Process(target = worker_2, args = (3,)) 22 p3 = multiprocessing.Process(target = worker_3, args = (4,)) 23 24 p1.start() 25 p2.start() 26 p3.start() 27 # 用来获得当前的CPU的核数,可以用来设置接下来子进程的个数。 28 # 用来获得当前所有的子进程,包括daemon和非daemon子进程。 29 # p.name,p.pid分别表示进程的名字,进程id。 30 print("The number of CPU is:" + str(multiprocessing.cpu_count())) 31 for p in multiprocessing.active_children(): 32 print("child p.name:" + p.name + "\tp.id" + str(p.pid)) 33 print ("END!!!!!!!!!!!!!!!!!")
将进程定义为类:
1 import multiprocessing 2 import time 3 4 class ClockProcess(multiprocessing.Process): 5 def __init__(self, interval): 6 multiprocessing.Process.__init__(self) 7 self.interval = interval 8 9 def run(self): 10 n = 5 11 while n > 0: 12 print("the time is {0}".format(time.ctime())) 13 time.sleep(self.interval) 14 n -= 1 15 16 if __name__ == '__main__': 17 p = ClockProcess(3) 18 p.start()
daemon程序对比结果:
1.不加daemon
1 import multiprocessing 2 import time 3 4 def worker(interval): 5 print("work start:{0}".format(time.ctime())); 6 time.sleep(interval) 7 print("work end:{0}".format(time.ctime())); 8 9 if __name__ == "__main__": 10 p = multiprocessing.Process(target = worker, args = (3,)) 11 p.start() 12 print ("end!") 13 14 #程序运行结果 15 ''' 16 end! 17 work start:Wed Jun 28 00:07:57 2017 18 work end:Wed Jun 28 00:08:00 2017 19 '''
2.加daemon
1 import multiprocessing 2 import time 3 4 def worker(interval): 5 print("work start:{0}".format(time.ctime())); 6 time.sleep(interval) 7 print("work end:{0}".format(time.ctime())); 8 9 if __name__ == "__main__": 10 p = multiprocessing.Process(target = worker, args = (3,)) 11 p.daemon = True 12 p.start() 13 print ("end!") 14 15 #程序运行结果 16 ''' 17 end! 18 19 '''
PS:因子进程设置了daemon属性,主进程结束,它们就随着结束了。
3.设置daemon执行完结束的方法
1 import multiprocessing 2 import time 3 4 def worker(interval): 5 print("work start:{0}".format(time.ctime())); 6 time.sleep(interval) 7 print("work end:{0}".format(time.ctime())); 8 9 if __name__ == "__main__": 10 p = multiprocessing.Process(target = worker, args = (3,)) 11 p.daemon = True 12 p.start() 13 p.join() 14 print "end!" 15 16 # 结果 17 ''' 18 work start:Tue Apr 21 22:16:32 2015 19 work end:Tue Apr 21 22:16:35 2015 20 end! 21 '''
2、Lock
当多个进程需要访问共享资源的时候,Lock可以用来避免访问的冲突。
1 import multiprocessing 2 import sys 3 4 def worker_with(lock, f): 5 with lock: 6 fs = open(f, 'a+') 7 n = 10 8 while n > 1: 9 fs.write("Lockd acquired via with\n") 10 n -= 1 11 fs.close() 12 13 def worker_no_with(lock, f): 14 lock.acquire() 15 try: 16 fs = open(f, 'a+') 17 n = 10 18 while n > 1: 19 fs.write("Lock acquired directly\n") 20 n -= 1 21 fs.close() 22 finally: 23 lock.release() 24 25 if __name__ == "__main__": 26 lock = multiprocessing.Lock() 27 f = "file.txt" 28 w = multiprocessing.Process(target = worker_with, args=(lock, f)) 29 nw = multiprocessing.Process(target = worker_no_with, args=(lock, f)) 30 w.start() 31 nw.start() 32 print ("end")
3、Semaphore
Semaphore用来控制对共享资源的访问数量,例如池的最大连接数。
1 import multiprocessing 2 import time 3 4 def worker(s, i): 5 s.acquire() 6 print(multiprocessing.current_process().name + "acquire") 7 time.sleep(i) 8 print(multiprocessing.current_process().name + "release\n") 9 s.release() 10 11 if __name__ == "__main__": 12 s = multiprocessing.Semaphore(2) # 限制最多有两个进程同时执行 13 for i in range(5): 14 p = multiprocessing.Process(target = worker, args=(s, i*2)) 15 p.start()
运行结果:
1 Process-4acquire 2 Process-2acquire 3 Process-2release 4 5 Process-1acquire 6 Process-1release 7 8 Process-3acquire 9 Process-4release 10 11 Process-5acquire 12 Process-3release 13 14 Process-5release
4、Event
Event实现进程间同步通信
1 import multiprocessing 2 import time 3 4 def wait_for_event(e): 5 print("wait_for_event: starting") 6 e.wait() 7 print("wairt_for_event: e.is_set()->" + str(e.is_set())) 8 9 def wait_for_event_timeout(e, t): 10 print("wait_for_event_timeout:starting") 11 e.wait(t) 12 print("wait_for_event_timeout:e.is_set->" + str(e.is_set())) 13 14 if __name__ == "__main__": 15 e = multiprocessing.Event() 16 w1 = multiprocessing.Process(name = "block", 17 target = wait_for_event, 18 args = (e,)) 19 20 w2 = multiprocessing.Process(name = "non-block", 21 target = wait_for_event_timeout, 22 args = (e, 2)) 23 w1.start() 24 w2.start() 25 26 time.sleep(3) 27 28 e.set() 29 print("main: event is set") 30 31 # 运行结果 32 ''' 33 34 wait_for_event: starting 35 36 wait_for_event_timeout:starting 37 38 wait_for_event_timeout:e.is_set->False 39 40 main: event is set 41 42 wairt_for_event: e.is_set()->True 43 44 '''
5、Queue
1 import multiprocessing 2 def writer_proc(q): 3 try: 4 q.put(1, block = False) 5 except: 6 pass 7 8 def reader_proc(q): 9 try: 10 print (q.get(block = False)) 11 except: 12 pass 13 14 if __name__ == "__main__": 15 q = multiprocessing.Queue() 16 writer = multiprocessing.Process(target=writer_proc, args=(q,)) 17 writer.start() 18 19 reader = multiprocessing.Process(target=reader_proc, args=(q,)) 20 reader.start() 21 22 reader.join() 23 writer.join() 24 25 # 运行结果 26 # 1
6、Pipe
Pipe可以是单向(half-duplex),也可以是双向(duplex)。我们通过mutiprocessing.Pipe(duplex=False)创建单向管道 (默认为双向)。一个进程从PIPE一端输入对象,然后被PIPE另一端的进程接收,单向管道只允许管道一端的进程输入,而双向管道则允许从两端输入。
1 # proc1 发送消息,proc2,proc3轮流接收消息 2 import multiprocessing 3 import time 4 5 def proc1(pipe): 6 while True: 7 for i in range(100): 8 print ("send: %s" %(i)) 9 pipe.send(i) 10 time.sleep(1) 11 12 def proc2(pipe): 13 while True: 14 print ("proc2 rev:", pipe.recv()) 15 time.sleep(1) 16 17 def proc3(pipe): 18 while True: 19 print ("proc3 rev:", pipe.recv()) 20 time.sleep(1) 21 22 if __name__ == "__main__": 23 pipe = multiprocessing.Pipe() 24 p1 = multiprocessing.Process(target=proc1, args=(pipe[0],)) 25 p2 = multiprocessing.Process(target=proc2, args=(pipe[1],)) 26 p3 = multiprocessing.Process(target=proc3, args=(pipe[1],)) 27 28 p1.start() 29 p2.start() 30 p3.start() 31 32 p1.join() 33 p2.join() 34 p3.join() 35 36 # 运行结果 37 ''' 38 send: 0 39 proc2 rev: 0 40 send: 1 41 proc3 rev: 1 42 send: 2 43 proc2 rev: 2 44 send: 3 45 proc3 rev: 3 46 send: 4 47 proc2 rev: 4 48 send: 5 49 proc3 rev: 5 50 send: 6 51 proc2 rev: 6 52 send: 7 53 proc3 rev: 7 54 send: 8 55 proc2 rev: 8 56 send: 9 57 proc3 rev: 9 58 send: 10 59 proc2 rev: 10 60 ...... 61 '''
7、Pool
在利用Python进行系统管理的时候,特别是同时操作多个文件目录,或者远程控制多台主机,并行操作可以节约大量的时间。当被操作对象数目不大时,可以直接利用multiprocessing中的Process动态成生多个进程,十几个还好,但如果是上百个,上千个目标,手动的去限制进程数量却又太过繁琐,此时可以发挥进程池的功效。
Pool可以提供指定数量的进程,供用户调用,当有新的请求提交到pool中时,如果池还没有满,那么就会创建一个新的进程用来执行该请求;但如果池中的进程数已经达到规定最大值,那么该请求就会等待,直到池中有进程结束,才会创建新的进程来执行它。
使用进程池(非阻塞)
1 import multiprocessing 2 import time 3 4 def func(msg): 5 print ("msg:", msg) 6 time.sleep(3) 7 print ("end") 8 9 if __name__ == "__main__": 10 pool = multiprocessing.Pool(processes = 3) # 池中最大进程数为3 11 for i in range(10): 12 msg = "hello %d" %(i) 13 pool.apply_async(func, (msg, )) #维持执行的进程总数为processes,当一个进程执行完毕后会添加新的进程进去 14 15 print ("Mark~ Mark~ Mark~~~~~~~~~~~~~~~~~~~~~~") 16 pool.close() 17 pool.join() #调用join之前,先调用close函数,否则会出错。执行完close后不会有新的进程加入到pool,join函数等待所有子进程结束 18 print ("Sub-process(es) done.")
运行结果:
Mark~ Mark~ Mark~~~~~~~~~~~~~~~~~~~~~~ msg: hello 0 msg: hello 1 msg: hello 2 end msg: hello 3 end msg: hello 4 end msg: hello 5 end msg: hello 6 end msg: hello 7 end msg: hello 8 end msg: hello 9 end end end Sub-process(es) done.
函数解释:
- apply_async(func[, args[, kwds[, callback]]]) 它是非阻塞,apply(func[, args[, kwds]])是阻塞的(理解区别,看例1例2结果区别)
- close() 关闭pool,使其不在接受新的任务。
- terminate() 结束工作进程,不在处理未完成的任务。
- join() 主进程阻塞,等待子进程的退出, join方法要在close或terminate之后使用。
执行说明:创建一个进程池pool,并设定进程的数量为3,range(4)会相继产生四个对象[0, 1, 2, 3,4,5,6,7,8,9],十个对象被提交到pool中,因pool指定进程数为3,所以0、1、2会直接送到进程中执行,当其中一个执行完事后才空出一个进程处理对象3,所以会出现输出“msg: hello 3”出现在"end"后。因为为非阻塞,主函数会自己执行自个的,不搭理进程的执行,所以运行完for循环后直接输出“mMsg: hark~ Mark~ Mark~~~~~~~~~~~~~~~~~~~~~~”,主程序在pool.join()处等待各个进程的结束。
使用线程池(阻塞)
1 import multiprocessing 2 import time 3 4 def func(msg): 5 print ("msg:", msg) 6 time.sleep(3) 7 print ("end") 8 9 if __name__ == "__main__": 10 pool = multiprocessing.Pool(processes = 3) # 池中最大进程数为3 11 for i in range(10): 12 msg = "hello %d" %(i) 13 pool.apply(func, (msg, )) #维持执行的进程总数为processes,当一个进程执行完毕后会添加新的进程进去 14 15 print ("Mark~ Mark~ Mark~~~~~~~~~~~~~~~~~~~~~~") 16 pool.close() 17 pool.join() #调用join之前,先调用close函数,否则会出错。执行完close后不会有新的进程加入到pool,join函数等待所有子进程结束 18 print ("Sub-process(es) done.") 19 20 # 运行结果 21 ''' 22 msg: hello 0 23 end 24 msg: hello 1 25 end 26 msg: hello 2 27 end 28 msg: hello 3 29 end 30 msg: hello 4 31 end 32 msg: hello 5 33 end 34 msg: hello 6 35 end 36 msg: hello 7 37 end 38 msg: hello 8 39 end 40 msg: hello 9 41 end 42 Mark~ Mark~ Mark~~~~~~~~~~~~~~~~~~~~~~ 43 Sub-process(es) done. 44 '''
使用多个进程池
1 import multiprocessing 2 import os, time, random 3 4 5 def Lee(): 6 print("\nRun task Lee-%s" % (os.getpid())) # os.getpid()获取当前的进程的ID 7 start = time.time() 8 time.sleep(random.random() * 10) # random.random()随机生成0-1之间的小数 9 end = time.time() 10 print( 'Task Lee, runs %0.2f seconds.' % (end - start)) 11 12 13 def Marlon(): 14 print("\nRun task Marlon-%s" % (os.getpid())) 15 start = time.time() 16 time.sleep(random.random() * 40) 17 end = time.time() 18 print('Task Marlon runs %0.2f seconds.' % (end - start)) 19 20 21 def Allen(): 22 print("\nRun task Allen-%s" % (os.getpid())) 23 start = time.time() 24 time.sleep(random.random() * 30) 25 end = time.time() 26 print('Task Allen runs %0.2f seconds.' % (end - start)) 27 28 29 def Frank(): 30 print( "\nRun task Frank-%s" % (os.getpid())) 31 start = time.time() 32 time.sleep(random.random() * 20) 33 end = time.time() 34 print( 'Task Frank runs %0.2f seconds.' % (end - start)) 35 36 37 if __name__ == '__main__': 38 function_list = [Lee, Marlon, Allen, Frank] 39 print("parent process %s" % (os.getpid())) 40 41 pool = multiprocessing.Pool(4) 42 for func in function_list: 43 pool.apply_async(func) # Pool执行函数,apply执行函数,当有一个进程执行完毕后,会添加一个新的进程到pool中 44 45 print('Waiting for all subprocesses done...') 46 pool.close() 47 pool.join() # 调用join之前,一定要先调用close() 函数,否则会出错, close()执行后不会有新的进程加入到pool,join函数等待素有子进程结束 48 print( 'All subprocesses done.') 49 50 # 运行结果 51 ''' 52 parent process 3256 53 Waiting for all subprocesses done... 54 55 Run task Lee-2196 56 57 Run task Marlon-4580 58 59 Run task Allen-5920 60 61 Run task Frank-6384 62 Task Allen runs 2.15 seconds. 63 Task Lee, runs 9.99 seconds. 64 Task Frank runs 14.14 seconds. 65 Task Marlon runs 32.74 seconds. 66 All subprocesses done. 67 68 '''