import threading,time def run(n): print("test...",n) time.sleep(2) if __name__ == '__main__': t1 = threading.Thread(target=run,args=("t1",)) t2 = threading.Thread(target=run,args=("t2",)) # 两个同时执行,然后等待两秒程序结束 t1.start() t2.start() # 程序输出 # test... t1 # test... t2
import threading,time class MyThread(threading.Thread): def __init__(self,num): # threading.Thread.__init__(self) super(MyThread,self).__init__() self.num =num def run(self):#定义每个线程要运行的函数 print("running on number:%s" %self.num) time.sleep(2) if __name__ == '__main__': # 两个同时执行,然后等待两秒程序结束 t1 = MyThread(1) t2 = MyThread(2) t1.start() t2.start() # 程序输出 # running on number:1 # running on number:2
import threading,time def run(n,sleep_time): print("test...",n) time.sleep(sleep_time) print("test...done", n) if __name__ == '__main__': t1 = threading.Thread(target=run,args=("t1",2)) t2 = threading.Thread(target=run,args=("t2",3)) # 两个同时执行,然后等待t1执行完成后,主线程和子线程再开始执行 t1.start() t2.start() t1.join() # 等待t1 print("main thread") # 程序输出 # test... t1 # test... t2 # test...done t1 # main thread # test...done t2
import threading,time def run(n): print('[%s]------running----\n' % n) time.sleep(2) print('--done--') def main(): for i in range(5): t = threading.Thread(target=run, args=[i, ]) t.start() t.join(1) print('starting thread', t.getName()) m = threading.Thread(target=main, args=[]) m.setDaemon(True) # 将main线程设置为Daemon线程,它做为程序主线程的守护线程,当主线程退出时, # m线程也会退出,由m启动的其它子线程会同时退出,不管是否执行完任务 m.start() m.join(timeout=2) print("---main thread done----") # 程序输出 # [0]------running---- # starting thread Thread-2 # [1]------running---- # --done-- # ---main thread done----
4、Mutex 线程锁(互斥锁)
import time import threading def addNum(): global num # 在每个线程中都获取这个全局变量 print('--get num:', num) time.sleep(1) num -= 1 # 对此公共变量进行-1操作 num = 100 # 设定一个共享变量 thread_list = [] for i in range(100): t = threading.Thread(target=addNum) t.start() thread_list.append(t) for t in thread_list: # 等待所有线程执行完毕 t.join() print('final num:', num)
正常来讲,这个num结果应该是0, 但在python 2.7上多运行几次,会发现,最后打印出来的num结果不总是0,为什么每次运行的结果不一样呢? 哈,很简单,假设你有A,B两个线程,此时都 要对num 进行减1操作, 由于2个线程是并发同时运行的,所以2个线程很有可能同时拿走了num=100这个初始变量交给cpu去运算,当A线程去处完的结果是99,但此时B线程运算完的结果也是99,两个线程同时CPU运算的结果再赋值给num变量后,结果就都是99。那怎么办呢? 很简单,每个线程在要修改公共数据时,为了避免自己在还没改完的时候别人也来修改此数据,可以给这个数据加一把锁, 这样其它线程想修改此数据时就必须等待你修改完毕并把锁释放掉后才能再访问此数据。
import time import threading def addNum(): global num # 在每个线程中都获取这个全局变量 print('--get num:', num) time.sleep(1) lock.acquire() # 修改数据前加锁 num -= 1 # 对此公共变量进行-1操作 lock.release() # 修改后释放 num = 100 # 设定一个共享变量 thread_list = [] lock = threading.Lock() # 生成全局锁 for i in range(100): t = threading.Thread(target=addNum) t.start() thread_list.append(t) for t in thread_list: # 等待所有线程执行完毕 t.join() print('final num:', num)
机智的同学可能会问到这个问题,就是既然你之前说过了,Python已经有一个GIL来保证同一时间只能有一个线程来执行了,为什么这里还需要lock? 注意啦,这里的lock是用户级的lock,跟那个GIL没关系 ,具体我们通过下图来看一下
5、RLock 递归锁
import threading, time def run1(): print("grab the first part data") lock.acquire() global num num += 1 lock.release() return num def run2(): print("grab the second part data") lock.acquire() global num2 num2 += 1 lock.release() return num2 def run3(): lock.acquire() res = run1() print('--------between run1 and run2-----') res2 = run2() lock.release() print(res, res2) if __name__ == '__main__': num, num2 = 0, 0 lock = threading.RLock() for i in range(10): t = threading.Thread(target=run3) t.start() while threading.active_count() != 1: print(threading.active_count()) else: print('----all threads done---') print(num, num2)
6、Semaphore 信号量
Mutex 同时只允许一个线程更改数据,而Semaphore是同时允许一定数量的线程更改数据 ,比如厕所有3个坑,那最多只允许3个人上厕所,后面的人只能等里面有人出来了才能再进去。
import threading, time def run(n): semaphore.acquire() time.sleep(1) print("run the thread: %s\n" % n) semaphore.release() if __name__ == '__main__': num = 0 semaphore = threading.BoundedSemaphore(5) # 最多允许5个线程同时运行 for i in range(20): t = threading.Thread(target=run, args=(i,)) t.start() while threading.active_count() != 1: pass # print threading.active_count() else: print('----all threads done---') print(num)
import threading,time def light(): count = 0 while True: if count < 10: #红灯 print("\033[41;1m红灯\033[0m",10-count) elif count >= 10 and count < 30: #绿灯 event.set() # 设置标志位 print("\033[42;1m绿灯\033[0m",30-count) else: event.clear() #把标志位清空 count = 0 time.sleep(1) count +=1 def car(n): while True: if event.is_set(): print("\033[32;0m[%s]在路上飞奔.....\033[0m"%n) else: print("\033[31;0m[%s]等红灯等的花都谢了.....\033[0m" % n) time.sleep(1) if __name__ == "__main__": event = threading.Event() light = threading.Thread(target=light) light.start() car = threading.Thread(target=car,args=("tesla",)) car.start()
import queue q = queue.Queue() for i in range(10): q.put(i) for t in range(10): print(q.get()) # 0 # 1 # 2 # 3 # 4 # 5 # 6 # 7 # 8 # 9
queue is especially useful in threaded programming when information must be exchanged safely between multiple threads. class queue.Queue(maxsize=0) #先入先出 class queue.LifoQueue(maxsize=0) #last in fisrt out class queue.PriorityQueue(maxsize=0) #存储数据时可设置优先级的队列 Constructor for a priority queue. maxsize is an integer that sets the upperbound limit on the number of items that can be placed in the queue. Insertion will block once this size has been reached, until queue items are consumed. If maxsize is less than or equal to zero, the queue size is infinite. The lowest valued entries are retrieved first (the lowest valued entry is the one returned by sorted(list(entries))[0]). A typical pattern for entries is a tuple in the form: (priority_number, data). exception queue.Empty Exception raised when non-blocking get() (or get_nowait()) is called on a Queue object which is empty. exception queue.Full Exception raised when non-blocking put() (or put_nowait()) is called on a Queue object which is full. Queue.qsize() Queue.empty() #return True if empty Queue.full() # return True if full Queue.put(item, block=True, timeout=None) Put item into the queue. If optional args block is true and timeout is None (the default), block if necessary until a free slot is available. If timeout is a positive number, it blocks at most timeout seconds and raises the Full exception if no free slot was available within that time. Otherwise (block is false), put an item on the queue if a free slot is immediately available, else raise the Full exception (timeout is ignored in that case). Queue.put_nowait(item) Equivalent to put(item, False). Queue.get(block=True, timeout=None) Remove and return an item from the queue. If optional args block is true and timeout is None (the default), block if necessary until an item is available. If timeout is a positive number, it blocks at most timeout seconds and raises the Empty exception if no item was available within that time. Otherwise (block is false), return an item if one is immediately available, else raise the Empty exception (timeout is ignored in that case). Queue.get_nowait() Equivalent to get(False). Two methods are offered to support tracking whether enqueued tasks have been fully processed by daemon consumer threads. Queue.task_done() Indicate that a formerly enqueued task is complete. Used by queue consumer threads. For each get() used to fetch a task, a subsequent call to task_done() tells the queue that the processing on the task is complete. If a join() is currently blocking, it will resume when all items have been processed (meaning that a task_done() call was received for every item that had been put() into the queue). Raises a ValueError if called more times than there were items placed in the queue. Queue.join() block直到queue被消费完毕
获取线程ID的方式(python的threading因为封装的太好, 很多本源的东西在threading对象里是拿不到的. 首先需要说明的是python threading的name跟ident,这些看起来是线程名字,线程id其实只是个标识,注意是标识而已. 简单过了下threading创建对象及启动线程的代码,发现ident跟pstree查到的线程id是两码事. )
import time import threading import ctypes print(threading.currentThread()) print(threading.currentThread().ident) print(ctypes.CDLL('libc.so.6').syscall(186)) # <_MainThread(MainThread, started 139645800499008)> # 139645800499008 # 74856
#多进程 import multiprocessing,time import threading def thread_run(): print("thread id ",threading.get_ident()) def run(name): time.sleep(1) print("process----",name) t = threading.Thread(target=thread_run,) t.start() if __name__ == "__main__": for i in range(10): p = multiprocessing.Process(target=run,args=("lzl",)) p.start()
#多进程id from multiprocessing import Process import os def info(title): print(title) print('module name:', __name__) print('parent process:', os.getppid()) # 父进程id print('process id:', os.getpid()) # 子进程id def f(name): info('\033[31;1mfunction f\033[0m') print('hello', name) if __name__ == '__main__': info('\033[32;1mmain process line\033[0m') p = Process(target=f, args=('bob',)) p.start() p.join() # 输出 # main process line # module name: __main__ # parent process: 7668 # process id: 7496 # function f # module name: __mp_main__ # parent process: 7496 # process id: 7188 # hello bob
① Queue
#Queue 进程间通信 import multiprocessing def f(q): q.put([42,None,"hello"]) if __name__ == "__main__": q = multiprocessing.Queue() p = multiprocessing.Process(target=f,args=(q,)) p.start() print(q.get()) p.join() #输出 [42, None, 'hello']
② Pipe
#Pipe 进程间通信 import multiprocessing def f(conn): conn.send("hello from child") conn.close() pass if __name__ == "__main__": parent_conn,child_conn = multiprocessing.Pipe() p = multiprocessing.Process(target=f,args=(child_conn,)) p.start() print(parent_conn.recv()) p.join() #输出 hello from child
③ Manager
#Manager 进程间共享数据 import multiprocessing import os def f(d,l): d["1"] = 1 d["2"] = 2 l.append(os.getpid()) if __name__ == "__main__": manager = multiprocessing.Manager() d = manager.dict() #创建一个字典,进程间可以共享数据 l = manager.list() p_list = [] for i in range(10): p = multiprocessing.Process(target=f,args=(d,l,)) p.start() p_list.append(p) for t in p_list: t.join() print(d) print(l) #输出 # {'2': 2, '1': 1} # [516, 3628, 6076, 5020, 5396, 4752, 6072, 3608, 3704, 5124]
④ 进程同步
Without using the lock output from the different processes is liable to get all mixed up
from multiprocessing import Process, Lock def f(l, i): l.acquire() print('hello world', i) l.release() if __name__ == '__main__': lock = Lock() for num in range(10): Process(target=f, args=(lock, num)).start()
- apply 同步执行(串行)
- apply_async 异步执行(并行)
# 进程池 from multiprocessing import Process, Pool import time,os def Foo(i): time.sleep(2) print("in process",os.getpid()) return i + 100 def Bar(arg): print('-->exec done:',arg,os.getpid()) if __name__ == "__main__": pool = Pool(5) #允许进程池同时放入5个进程 print("主进程:",os.getpid()) for i in range(10): #pool.apply_async(func=Foo, args=(i,), callback=Bar) #callback回调 执行完func后再执行callback 用主程序执行 pool.apply(func=Foo, args=(i,)) pool.close() pool.join() # 进程池中进程执行完毕后再关闭,如果注释,那么程序直接关闭。 # 主进程: 5896 # in process 1520 # in process 5596 # -->exec done: 102 5896 # in process 3384 # -->exec done: 100 5896 # -->exec done: 101 5896 # in process 6112 # -->exec done: 103 5896 # in process 1472 # -->exec done: 104 5896 # in process 1520 # in process 5596 # -->exec done: 106 5896 # -->exec done: 105 5896 # in process 3384 # -->exec done: 107 5896 # in process 6112 # -->exec done: 108 5896 # in process 1472 # -->exec done: 109 5896
- 无需线程上下文切换的开销
- 无需原子操作锁定及同步的开销
- 方便切换控制流,简化编程模型
- 高并发+高扩展性+低成本:一个CPU支持上万的协程都不是问题。所以很适合用于高并发处理。
- 无法利用多核资源:协程的本质是个单线程,它不能同时将 单个CPU 的多个核用上,协程需要和进程配合才能运行在多CPU上.当然我们日常所编写的绝大部分应用都没有这个必要,除非是cpu密集型应用。
- 进行阻塞(Blocking)操作(如IO时)会阻塞掉整个程序
使用yield实现协程操作例子 ,单线程下实现多并发的效果
# yield实现协程 def consumer(name): print("------>starting eating baozi..") while True: new_baozi = yield print("[%s] is eating baozi %s"%(name,new_baozi)) def producer(): n = 0 while n < 5 : n +=1 con.send(n) #唤醒yield并且传值 con2.send(n) print("\033[32;1m[producer]\033[0m is making baozi %s" % n) if __name__ == "__main__": con = consumer("c1") #生成生成器 con2 = consumer("c2") con.__next__() #唤醒yield con2.__next__() producer() # 输出 # ------>starting eating baozi.. # ------>starting eating baozi.. # [c1] is eating baozi 1 # [c2] is eating baozi 1 # [producer] is making baozi 1 # [c1] is eating baozi 2 # [c2] is eating baozi 2 # [producer] is making baozi 2 # [c1] is eating baozi 3 # [c2] is eating baozi 3 # [producer] is making baozi 3 # [c1] is eating baozi 4 # [c2] is eating baozi 4 # [producer] is making baozi 4 # [c1] is eating baozi 5 # [c2] is eating baozi 5 # [producer] is making baozi 5
#!/usr/bin/env python # -*- coding:utf-8 -*- #-Author-Lian from greenlet import greenlet def test1(): print("in test1 12") gr2.switch() print("in test1 34") gr2.switch() def test2(): print("in test2 56") gr1.switch() print("in test2 78") gr1 = greenlet(test1) #启动一个协程 gr2 = greenlet(test2) gr1.switch() #切换操作 类似于yeild里的next() # 输出 # in test1 12 # in test2 56 # in test1 34 # in test2 78
Gevent 是一个第三方库,可以轻松通过gevent实现并发同步或异步编程,在gevent中用到的主要模式是Greenlet, 它是以C扩展模块形式接入Python的轻量级协程。 Greenlet全部运行在主程序操作系统进程的内部,但它们被协作式地调度。
#!/usr/bin/env python # -*- coding:utf-8 -*- #-Author-Lian import gevent def foo(): print("runing in foo") gevent.sleep(2) print("context swith to foo again") def bar(): print("context to bar") gevent.sleep(1) print("context to swith bar to bar") gevent.joinall([ #启动协程 gevent.spawn(foo), gevent.spawn(bar), ]) #输出 # runing in foo # context to bar # context to swith bar to bar # context swith to foo again
# 同步异步性能对比 import urllib.request import gevent,time from gevent import monkey monkey.patch_all() #monkey.patch_all()执行后可以识别urllib里面的I/0操作 def f(url): print("GET: %s"%url) resp = urllib.request.urlopen(url) data = resp.read() print("%d bytes received from %s"%(len(data),url)) # 同步开销 urls = [ 'https://www.python.org/', 'https://www.yahoo.com/', 'https://github.com/', ] time_start = time.time() for url in urls: f(url) print("同步cost time",time.time()-time_start) # 异步开销 async_time_start = time.time() gevent.joinall([ gevent.spawn(f,'https://www.python.org/'), gevent.spawn(f,'https://www.yahoo.com/'), gevent.spawn(f,'https://github.com/') ]) print("异步cost time",time.time()-async_time_start) # 输出 # GET: https://www.python.org/ # 47446 bytes received from https://www.python.org/ # GET: https://www.yahoo.com/ # 431619 bytes received from https://www.yahoo.com/ # GET: https://github.com/ # 25478 bytes received from https://github.com/ # 同步cost time 4.225241661071777 # GET: https://www.python.org/ # GET: https://www.yahoo.com/ # GET: https://github.com/ # 25478 bytes received from https://github.com/ # 461925 bytes received from https://www.yahoo.com/ # 47446 bytes received from https://www.python.org/ # 异步cost time 2.5521459579467773
import sys import socket import time import gevent from gevent import socket,monkey monkey.patch_all() def server(port): s = socket.socket() s.bind(('', port)) s.listen(5) while True: conn, addr = s.accept() gevent.spawn(handle_request, conn) def handle_request(conn): try: while True: data = conn.recv(1024) print("recv:", data) conn.send(data) if not data: conn.shutdown(socket.SHUT_WR) except Exception as ex: print(ex) finally: conn.close() if __name__ == '__main__': server(8001)
import socket HOST = 'localhost' # The remote host PORT = 8001 # The same port as used by the server s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect((HOST, PORT)) while True: msg = bytes(input(">>:"),encoding="utf8") s.sendall(msg) data = s.recv(1024) #print(data) print('Received', repr(data)) s.close()
1. CPU资源浪费,可能鼠标点击的频率非常小,但是扫描线程还是会一直循环检测,这会造成很多的CPU资源浪费;如果扫描鼠标点击的接口是阻塞的呢?
2. 如果是堵塞的,又会出现下面这样的问题,如果我们不但要扫描鼠标点击,还要扫描键盘是否按下,由于扫描鼠标时被堵塞了,那么可能永远不会去扫描键盘;
3. 如果一个循环需要扫描的设备非常多,这又会引来响应时间的问题;
1. 有一个事件(消息)队列;
2. 鼠标按下时,往这个队列中增加一个点击事件(消息);
3. 有个循环,不断从队列取出事件,根据不同的事件,调用不同的函数,如onClick()、onKeyDown()等;
4. 事件(消息)一般都各自保存各自的处理函数指针,这样,每个消息都有独立的处理函数;
- 程序中有许多任务,而且…
- 任务之间高度独立(因此它们不需要互相通信,或者等待彼此)而且…
- 在等待事件到来时,某些任务会阻塞。
总结:异步IO涉及到了事件驱动模型,进程中维护一个消息队列,当客户端又请求时,就会把请求添加到消息队列中,线程从消息队列中轮询取要处理的请求,遇到I/O阻塞时(操作系统处理调用I/O接口处理,与程序无关),则进行上下文切换,处理其他请求,当I/O操作完成时,调用回调函数,告诉线程处理完成,然后再切换回来,处理完成后返回给客户端 Nginx能处理高并发就是用的这个原理
