并发编程:协程,异步调用。
一 进程池的补充:异步加回调机制。
我们前面说了,异步调用在发出任务请求之后,不会等待任务的执行结果,而是直接执行下一行代码。
那么我们如何知道这个任务有没有完成,以及什么时候完成的呢:这里我们就需要用到了回调机制:
# from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor # import requests # import os # import time # import random # # def get(url): # print('%s GET %s' %(os.getpid(),url)) # response=requests.get(url) # time.sleep(random.randint(1,3)) # # if response.status_code == 200: # return response.text # # def pasrse(res): # print('%s 解析结果为:%s' %(os.getpid(),len(res))) # # if __name__ == '__main__': # urls=[ # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.python.org', # # ] # # pool=ProcessPoolExecutor(4) # objs=[] # for url in urls: # obj=pool.submit(get,url) # objs.append(obj) # # pool.shutdown(wait=True) # # 问题: # # 1、任务的返回值不能得到及时的处理,必须等到所有任务都运行完毕才能统一进行处理 # # 2、解析的过程是串行执行的,如果解析一次需要花费2s,解析9次则需要花费18s # for obj in objs: # res=obj.result() # pasrse(res) # from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor # import requests # import os # import time # import random # # def get(url): # print('%s GET %s' %(os.getpid(),url)) # response=requests.get(url) # time.sleep(random.randint(1,3)) # # if response.status_code == 200: # pasrse(response.text) # # def pasrse(res): # print('%s 解析结果为:%s' %(os.getpid(),len(res))) # # if __name__ == '__main__': # urls=[ # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.python.org', # # ] # # pool=ProcessPoolExecutor(4) # for url in urls: # pool.submit(get,url) # # from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor # import requests # import os # import time # import random # # def get(url): # print('%s GET %s' %(os.getpid(),url)) # response=requests.get(url) # time.sleep(random.randint(1,3)) # # if response.status_code == 200: # # 干解析的活 # return response.text # # def pasrse(obj): # res=obj.result() # print('%s 解析结果为:%s' %(os.getpid(),len(res))) # # if __name__ == '__main__': # urls=[ # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.baidu.com', # 'https://www.python.org', # ] # # pool=ProcessPoolExecutor(4) # for url in urls: # obj=pool.submit(get,url) # obj.add_done_callback(pasrse) # # # 问题: # # 1、任务的返回值不能得到及时的处理,必须等到所有任务都运行完毕才能统一进行处理 # # 2、解析的过程是串行执行的,如果解析一次需要花费2s,解析9次则需要花费18s # print('主进程',os.getpid()) from concurrent.futures import ProcessPoolExecutor,ThreadPoolExecutor from threading import current_thread import requests import os import time import random def get(url): print('%s GET %s' %(current_thread().name,url)) response=requests.get(url) time.sleep(random.randint(1,3)) if response.status_code == 200: # 干解析的活 return response.text def pasrse(obj): res=obj.result() print('%s 解析结果为:%s' %(current_thread().name,len(res))) if __name__ == '__main__': urls=[ 'https://www.baidu.com', 'https://www.baidu.com', 'https://www.baidu.com', 'https://www.baidu.com', 'https://www.baidu.com', 'https://www.baidu.com', 'https://www.baidu.com', 'https://www.baidu.com', 'https://www.python.org', ] pool=ThreadPoolExecutor(4) for url in urls: obj=pool.submit(get,url) obj.add_done_callback(pasrse) # 问题: # 1、任务的返回值不能得到及时的处理,必须等到所有任务都运行完毕才能统一进行处理 # 2、解析的过程是串行执行的,如果解析一次需要花费2s,解析9次则需要花费18s print('主线程',current_thread().name)
那么对于线程来说,解析的一个工作就不再是主线程来做了,因为在线程中没有主次之分。
二 线程queue,与线程event
线程queue:
import queue # q=queue.Queue(3) #队列:先进先出 # q.put(1) # q.put(2) # q.put(3) # # q.put(4) # # print(q.get()) # print(q.get()) # print(q.get()) # q=queue.LifoQueue(3) #堆栈:后进先出 # # q.put('a') # q.put('b') # q.put('c') # # print(q.get()) # print(q.get()) # print(q.get()) q=queue.PriorityQueue(3) #优先级队列:可以以小元组的形式往队列里存值,
第一个元素代表优先级,数字越小优先级越高 q.put((10,'user1')) q.put((-3,'user2')) q.put((-2,'user3'))
线程event:
from threading import Event,current_thread,Thread import time event=Event() def check(): print('%s 正在检测服务是否正常....' %current_thread().name) time.sleep(5) event.set() def connect(): count=1 while not event.is_set(): if count == 4: print('尝试的次数过多,请稍后重试') return print('%s 尝试第%s次连接...' %(current_thread().name,count)) event.wait(1) count+=1 print('%s 开始连接...' % current_thread().name) if __name__ == '__main__': t1=Thread(target=connect) t2=Thread(target=connect) t3=Thread(target=connect) c1=Thread(target=check) t1.start() t2.start() t3.start() c1.start()
三 协程
协程:在单线程下实现并发,是一个抽象的概念,并不是真实存在的。
并发的本质:切换+保存状态
并行:是对于多个CPU而言的情况,几个CPU就可以同时运行一个进程。
串行:一个任务运行完之后才会运行下一个,他与并发是不一样的。
1 基于单线程的并发:
并发执行:
''' # import time # def consumer(): # '''任务1:接收数据,处理数据''' # while True: # x=yield # # # def producer(): # '''任务2:生产数据''' # g=consumer() # next(g) # for i in range(10000000): # g.send(i) # # start=time.time() # #基于yield保存状态,实现两个任务直接来回切换,即并发的效果 # #PS:如果每个任务中都加上打印,那么明显地看到两个任务的打印是你一次我一次, 即并发执行的. # producer() #1.0202116966247559 # # # stop=time.time() # print(stop-start)
串行:
# import time
# def consumer(res):
# '''任务1:接收数据,处理数据'''
# pass
#
# def producer():
# '''任务2:生产数据'''
# res=[]
# for i in range(10000000):
# res.append(i)
#
# consumer(res)
# # return res
#
# start=time.time()
# res=producer()
# stop=time.time()
# print(stop-start)
那么同样的对于并发来说,不同的程序类型,结果是不一样的。
比如纯计算的任务串行与并发执行:
# 纯计算的任务串行执行 import time def task1(): res=1 for i in range(1000000): res+=i def task2(): res=1 for i in range(1000000): res*=i start=time.time() task1() task2() stop=time.time() print(stop-start) # 纯计算的任务并发执行 import time def task1(): res=1 for i in range(1000000): res+=i yield time.sleep(10000) print('task1') def task2(): g=task1() res=1 for i in range(1000000): res*=i next(g) print('task2') start=time.time() #PS:如果每个任务中都加上打印,那么明显地看到两个任务的打印是你一次我一次,即并发执行的. task2() stop=time.time() print(stop-start)
2单线程下实现遇到IO切换:
# from greenlet import greenlet # import time # # def eat(name): # print('%s eat 1' %name) # time.sleep(30) # g2.switch('alex') # print('%s eat 2' %name) # g2.switch() # def play(name): # print('%s play 1' %name) # g1.switch() # print('%s play 2' %name) # # g1=greenlet(eat) # g2=greenlet(play) # # g1.switch('egon') # import gevent # # def eat(name): # print('%s eat 1' %name) # gevent.sleep(5) # print('%s eat 2' %name) # def play(name): # print('%s play 1' %name) # gevent.sleep(3) # print('%s play 2' %name) # # g1=gevent.spawn(eat,'egon') # g2=gevent.spawn(play,'alex') # # # gevent.sleep(100) # # g1.join() # # g2.join() # gevent.joinall([g1,g2]) # from gevent import monkey;monkey.patch_all() # import gevent # import time # # def eat(name): # print('%s eat 1' %name) # time.sleep(5) # print('%s eat 2' %name) # def play(name): # print('%s play 1' %name) # time.sleep(3) # print('%s play 2' %name) # # g1=gevent.spawn(eat,'egon') # g2=gevent.spawn(play,'alex') # # # gevent.sleep(100) # # g1.join() # # g2.join() # gevent.joinall([g1,g2]) from gevent import monkey;monkey.patch_all() from threading import current_thread import gevent import time def eat(): print('%s eat 1' %current_thread().name) time.sleep(5) print('%s eat 2' %current_thread().name) def play(): print('%s play 1' %current_thread().name) time.sleep(3) print('%s play 2' %current_thread().name) g1=gevent.spawn(eat) g2=gevent.spawn(play) # gevent.sleep(100) # g1.join() # g2.join() print(current_thread().name) gevent.joinall([g1,g2])
我们基于gevent,可以开多个进程,然后在进程中开多个线程,再在线程中实现并发,这样的话,我们程序的运行效率将会得到大幅度的提升。