Python - asyncio
入门
'''
1.event_loop: 事件循环,相当于一个无限循环,可以把一些函数注册到这个时间循环上,当满足发生条件
的时候,就调用相应的处理方法
2.coroutine: 协程对象类型,可以使用async 关键字来定义一个方法,这个方法在调用时不会立即被执行,而是会返回一个协程对象
我们可以将协程对象注册到事件循环中,它会被事件循环调用
3.task: 任务,这是协程对象的进一步封装,包含协程对象的各个状态
4.也可以将corotine 封装成 task, 然后通过 await 处理task
'''
# 定义一个协成对象
async def execute(x):
print(f'Number:{x}')
coroutine = execute(1) # 协程不会执行
print(coroutine)
out:
'''
<coroutine object execute at 0x000001C1361B3140>
'''
# 也可以直接使用 :asyncio.ensure_future(coroutine) 创建task
async def execute(x):
print(f'Number:{x}')
coroutine = execute(1)
task = asyncio.ensure_future(coroutine)
loop = asyncio.get_event_loop()
print(task)
loop.run_until_complete(task)
print(task)
# out:
'''
<coroutine object execute at 0x000001C1361B3140>
<Task pending name='Task-1' coro=<execute() running at E:\PyProject\asyncioDemo\main.py:31>>
Number:1
<Task finished name='Task-1' coro=<execute() done, defined at E:\PyProject\asyncioDemo\main.py:31> result=None>
'''
Python 3.7 引入: asnyncio.run()
方法驱动协程,将以前的驱动协程方式进一步封住:
async def execute(x):
print(f'Number:{x}')
async def main():
# await execute(1) 该方式也可以执行
task = asyncio.create_task(execute(1))
await task
# await asyncio.gather() 处理多个task
# await asyncio.wait() 处理多个task
if __name__ == '__main__':
asyncio.run(main())
常用函数
asyncio.create_task
官方解释:
The asyncio.create_task() function to run coroutines concurrently as asyncio Tasks.
import asyncio
async def execute(x):
print(f'Number:{x}')
async def main():
asyncio.create_task(execute(1))
if __name__ == '__main__':
asyncio.run(main())
asyncio.gather()
官方解释:
Coroutines will be wrapped in a future and scheduled in the event
loop. They will not necessarily be scheduled in the same order as
passed in.
import asyncio, time
async def main():
print(f'{time.ctime()} Hello!')
await asyncio.sleep(1.0)
print(f'{time.ctime()} Goodbye!')
loop = asyncio.get_event_loop()
task = loop.create_task(main())
pending = asyncio.all_tasks(loop=loop)
group = asyncio.gather(*pending,return_exceptions=True)
print(f'group: {group}') # out: group: <_GatheringFuture pending>
loop.run_until_complete(group)
print(f'group: {group}') # out: group: <_GatheringFuture finished result=[None]>
loop.close()
从输出可以看出asyncio.gather 返回的也是一个futuer
Return a future aggregating results from the given coroutines/futures.
return_exceptions = True:
# 该案列说明了:
# 1.return_exceptions 为Ture时,task抛出的异常将会作为一个对象返回,不会影响其他task的执行
# 2.gater 函数的返回结果和上从的参数一致
import asyncio
async def demo():
return 'test'
async def foo():
return 'foo'
async def bar():
raise RuntimeError('fake runtime error')
async def main():
task1 = asyncio.create_task(foo())
task2 = asyncio.create_task(bar())
task3 = asyncio.create_task(demo())
result = asyncio.gather(task2,task1,task3, return_exceptions=True)
return result
if __name__ == '__main__':
print(asyncio.run(main()).result()) # [RuntimeError('fake runtime error'), 'foo', 'test']
return_exceptions = False:
# taks2 抛出运行时异常,程序直接终止,其他任务也被终止
import asyncio
async def demo():
return 'test'
async def foo():
return 'foo'
async def bar():
raise RuntimeError('fake runtime error')
async def main():
task1 = asyncio.create_task(foo())
task2 = asyncio.create_task(bar())
task3 = asyncio.create_task(demo())
result = asyncio.gather(task2, task1, task3, return_exceptions=False)
return result
if __name__ == '__main__':
print(asyncio.run(main()).result())
'''
Traceback (most recent call last):
File "E:\PyProject\pytestDemo\demo9.py", line 24, in <module>
print(asyncio.run(main()).result()) # [RuntimeError('fake runtime error'), 'foo', 'test']
File "E:\PyProject\pytestDemo\demo9.py", line 14, in bar
raise RuntimeError('fake runtime error')
RuntimeError: fake runtime error
'''
loop.all_tasks
作用:返回loop 中的未执行的task
import asyncio, time
async def main():
print(f'{time.ctime()} Hello!')
await asyncio.sleep(1.0)
print(f'{time.ctime()} Goodbye!')
loop = asyncio.get_event_loop()
task = loop.create_task(main())
pending = asyncio.all_tasks(loop=loop)
print(pending) # {<Task pending name='Task-1' coro=<main() running at E:\PyProject\pytestDemo\demo4.py:8>>}
绑定回调函数
async def request():
url = 'http://www.baidu.com'
status = requests.get(url)
return status
def callback(task): # 回调函数
print(f'Status:{task.result()}')
coroutine = request()
task = asyncio.ensure_future(coroutine)
task.add_done_callback(callback) # 添加回调函数
print(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(task) # 将协程对象注册到事件,并启动
print(task)
# 直接调用task.result() 也可以获得返回结果
async def request():
url = 'http://www.baidu.com'
status = requests.get(url)
return status # => task.result()
coroutine = request()
task = asyncio.ensure_future(coroutine)
print(task)
loop = asyncio.get_event_loop()
loop.run_until_complete(task) # 将协程对象注册到事件,并启动
print(task.result())
print(task)
Semaphore
网络客户端应该限流,以免对服务器发起过多的请求。
信号量是同步原语,比时钟灵活。信号量可以配置最大数量,而且一个信号量可由多个协程持有,因此特别适合于限制活动的并发协程数量
'''
1.asyncio.Semaphore 有一个内部计时器。每次使用await semaphore.acquire() ,计数器递减
2.每次使用semaphore.release() ,计数器递增。需要注意的是该方法不是协程,用户阻塞
3.计数器的初始化值在在实例化Semaphore 时设定
semaphore = asyncio.Semaphore(concur_req)
4.若计数器大于零,则使用await处理.acquire() 方法没有延迟,若计数器为零,则.acquire() 中止等待处理的协程,
直到其他协程在同一个Semaphore实例上调用.release(),递增计数器。一般不直接调用这些方法,把semaphore当作
异步上下文管理器使用更安全
5.协程方法Semaphore.__aenter__异步等待.acquire.协程方法__aexit__调用.releare().
'''
http://www.manongjc.com/detail/30-cncrjfiipztubrh.html
多任务协程
多个任务怎么处理?
- 可以创建一个task列表叫给asyncio.wait(tasks) ,Python 3.11 将弃用
- 可以创建一个task列表叫给asyncio.gather(*tasks)
async def request():
url = 'http://www.baidu.com'
resp = requests.get(url)
print(resp)
return resp
async def main():
tasks = [request() for _ in range(1, 5)]
await asyncio.gather(*tasks)
if __name__ == '__main__':
results = asyncio.run(main())
# 但其实也依然是顺序执行,没有实现异步操作
await关键字
await关键字:它可以将耗时等待的操作挂起,让出控制权。如果协程在执行的时候遇到await,事件循环就会将本协程挂起,转而执行别的协程,直到其他协程挂起或执行完毕
await 后面的对象必须是如下格式之一:
- 一个原生协程对象
- 一个由types.coroutine 修饰的生成器,这个生成器返回协程对象
- 一个包含__await__方法的对象返回的一个迭代器
案例
import requests
import time
import logging
import asyncio
logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s: %(message)s')
url = 'https://www.httpbin.org/delay/1' # 延迟1s
start_time = time.time()
for _ in range(1, 10):
resp = requests.get(url)
logging.info(f'scraping {url}')
end_time = time.time()
logging.info(f'total time {start_time - end_time} s')
#out 同步访问耗时 18s:
'''
2022-11-20 18:13:20,426 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:22,395 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:24,697 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:26,675 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:28,680 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:30,728 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:32,905 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:34,886 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:36,980 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:36,980 - INFO: total time -18.95430302619934 s
'''
结合aiohttp 模块实现异步请求,优化上面程序:
import aiohttp
start = time.time()
async def get(url):
session = aiohttp.ClientSession()
resp = await session.get(url)
await resp.text()
await session.close()
return resp
async def request():
url = 'https://www.httpbin.org/delay/1'
resp = await get(url)
tasks = [asyncio.ensure_future(request()) for _ in range(10)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print(f'cost time:{end - start}')
# out 完成10个线程总计花费2s:
'''
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>
cost time:2.4535582065582275
Process finished with exit code 0
'''
另一种实现协程的方式,python 3.8 已经弃用:
import aiohttp
start = time.time()
# py 3.8 已经弃用
@asyncio.coroutine # 等价于 async def get(url)
def get(url):
session = aiohttp.ClientSession()
resp = yield from session.get(url) # 等价于 await session.get(url)
yield from resp.text()
yield from session.close()
return resp
@asyncio.coroutine
def request():
url = 'https://www.httpbin.org/delay/1'
print(f'Waiting for {url}...')
resp = yield from get(url)
print(f'ger response{resp}')
tasks = [asyncio.ensure_future(request()) for _ in range(10)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print(f'cost time:{end - start}')
异常
Traceback (most recent call last):
File "D:\pyproject\fa_search_test\tools\async_get_data.py", line 290, in <module>
asyncio.get_event_loop().run_until_complete(main())
File "D:\code_tool\python\lib\asyncio\base_events.py", line 646, in run_until_complete
return future.result()
File "D:\pyproject\fa_search_test\tools\async_get_data.py", line 244, in main
resp_data = await resp.json()
TypeError: object dict can't be used in 'await' expression
解决:https://segmentfault.com/q/1010000043220918
也就是:await resp.json() => json() 不是异步函数, 不能用await关键字
学习资料:
官网文档: https://docs.python.org/3/library/asyncio.html
https://www.doc88.com/p-18573348452914.html
本文来自博客园,作者:chuangzhou,转载请注明原文链接:https://www.cnblogs.com/czzz/p/16908534.html