Python - asyncio

入门

'''
1.event_loop: 事件循环,相当于一个无限循环,可以把一些函数注册到这个时间循环上,当满足发生条件
  的时候,就调用相应的处理方法
2.coroutine: 协程对象类型,可以使用async 关键字来定义一个方法,这个方法在调用时不会立即被执行,而是会返回一个协程对象
  我们可以将协程对象注册到事件循环中,它会被事件循环调用
3.task: 任务,这是协程对象的进一步封装,包含协程对象的各个状态
4.也可以将corotine 封装成 task, 然后通过 await 处理task
'''

# 定义一个协成对象
async def execute(x):
    print(f'Number:{x}')

coroutine = execute(1) # 协程不会执行
print(coroutine)

out:
'''
<coroutine object execute at 0x000001C1361B3140>
'''


# 也可以直接使用 :asyncio.ensure_future(coroutine) 创建task
async def execute(x):
    print(f'Number:{x}')

coroutine = execute(1)
task =  asyncio.ensure_future(coroutine)
loop = asyncio.get_event_loop()
print(task)
loop.run_until_complete(task)
print(task)



# out:
'''
<coroutine object execute at 0x000001C1361B3140>
<Task pending name='Task-1' coro=<execute() running at E:\PyProject\asyncioDemo\main.py:31>>
Number:1
<Task finished name='Task-1' coro=<execute() done, defined at E:\PyProject\asyncioDemo\main.py:31> result=None>
'''

Python 3.7 引入: asnyncio.run() 方法驱动协程,将以前的驱动协程方式进一步封住:

async def execute(x):
    print(f'Number:{x}')

async def main():

    # await execute(1) 该方式也可以执行
    task = asyncio.create_task(execute(1))
    await task
    # await asyncio.gather()  处理多个task
    # await asyncio.wait()   处理多个task
if __name__ == '__main__':
    asyncio.run(main())

常用函数

asyncio.create_task

官方解释:

The asyncio.create_task() function to run coroutines concurrently as asyncio Tasks.

import asyncio

async def execute(x):
    print(f'Number:{x}')

async def main():

    asyncio.create_task(execute(1))

if __name__ == '__main__':
    asyncio.run(main())

asyncio.gather()

官方解释:

Coroutines will be wrapped in a future and scheduled in the event
loop. They will not necessarily be scheduled in the same order as
passed in.

import asyncio, time


async def main():
    print(f'{time.ctime()} Hello!')
    await asyncio.sleep(1.0)
    print(f'{time.ctime()} Goodbye!')


loop = asyncio.get_event_loop()
task = loop.create_task(main())

pending = asyncio.all_tasks(loop=loop)  

group = asyncio.gather(*pending,return_exceptions=True)

print(f'group: {group}')  # out: group: <_GatheringFuture pending>
loop.run_until_complete(group)
print(f'group: {group}')  # out: group: <_GatheringFuture finished result=[None]>
loop.close()

从输出可以看出asyncio.gather 返回的也是一个futuer
Return a future aggregating results from the given coroutines/futures.

return_exceptions = True:

# 该案列说明了:
# 1.return_exceptions 为Ture时,task抛出的异常将会作为一个对象返回,不会影响其他task的执行
# 2.gater 函数的返回结果和上从的参数一致
import asyncio

async def demo():
    return 'test'

async def foo():
    return 'foo'

async def bar():
    raise RuntimeError('fake runtime error')

async def main():
    task1 = asyncio.create_task(foo())
    task2 = asyncio.create_task(bar())
    task3 = asyncio.create_task(demo())
    result = asyncio.gather(task2,task1,task3, return_exceptions=True)
    return result

if __name__ == '__main__':
    print(asyncio.run(main()).result()) # [RuntimeError('fake runtime error'), 'foo', 'test']

return_exceptions = False:


# taks2 抛出运行时异常,程序直接终止,其他任务也被终止

import asyncio

async def demo():
    return 'test'

async def foo():
    return 'foo'

async def bar():
    raise RuntimeError('fake runtime error')

async def main():
    task1 = asyncio.create_task(foo())
    task2 = asyncio.create_task(bar())
    task3 = asyncio.create_task(demo())
    result = asyncio.gather(task2, task1, task3, return_exceptions=False)
    return result

if __name__ == '__main__':
    print(asyncio.run(main()).result())
    '''
    Traceback (most recent call last):
      File "E:\PyProject\pytestDemo\demo9.py", line 24, in <module>
        print(asyncio.run(main()).result()) # [RuntimeError('fake runtime error'), 'foo', 'test']
      File "E:\PyProject\pytestDemo\demo9.py", line 14, in bar
        raise RuntimeError('fake runtime error')
    RuntimeError: fake runtime error
    '''

loop.all_tasks

作用:返回loop 中的未执行的task

import asyncio, time


async def main():
    print(f'{time.ctime()} Hello!')
    await asyncio.sleep(1.0)
    print(f'{time.ctime()} Goodbye!')

loop = asyncio.get_event_loop()
task = loop.create_task(main())

pending = asyncio.all_tasks(loop=loop)
print(pending)  # {<Task pending name='Task-1' coro=<main() running at E:\PyProject\pytestDemo\demo4.py:8>>}

绑定回调函数

async def request():
    url = 'http://www.baidu.com'
    status = requests.get(url)
    return status

def callback(task): # 回调函数
    print(f'Status:{task.result()}')

coroutine = request()
task = asyncio.ensure_future(coroutine)
task.add_done_callback(callback)  # 添加回调函数
print(task)

loop = asyncio.get_event_loop()
loop.run_until_complete(task) # 将协程对象注册到事件,并启动
print(task)



# 直接调用task.result() 也可以获得返回结果
async def request():
    url = 'http://www.baidu.com'
    status = requests.get(url)
    return status  # => task.result()
coroutine = request()
task = asyncio.ensure_future(coroutine)

print(task)

loop = asyncio.get_event_loop()
loop.run_until_complete(task) # 将协程对象注册到事件,并启动
print(task.result())
print(task)

Semaphore

网络客户端应该限流,以免对服务器发起过多的请求。
信号量是同步原语,比时钟灵活。信号量可以配置最大数量,而且一个信号量可由多个协程持有,因此特别适合于限制活动的并发协程数量

'''
1.asyncio.Semaphore 有一个内部计时器。每次使用await semaphore.acquire() ,计数器递减
2.每次使用semaphore.release() ,计数器递增。需要注意的是该方法不是协程,用户阻塞
3.计数器的初始化值在在实例化Semaphore 时设定
  semaphore = asyncio.Semaphore(concur_req)

4.若计数器大于零,则使用await处理.acquire() 方法没有延迟,若计数器为零,则.acquire() 中止等待处理的协程,
  直到其他协程在同一个Semaphore实例上调用.release(),递增计数器。一般不直接调用这些方法,把semaphore当作
   异步上下文管理器使用更安全
5.协程方法Semaphore.__aenter__异步等待.acquire.协程方法__aexit__调用.releare().
'''

http://www.manongjc.com/detail/30-cncrjfiipztubrh.html

多任务协程

多个任务怎么处理?

  • 可以创建一个task列表叫给asyncio.wait(tasks) ,Python 3.11 将弃用
  • 可以创建一个task列表叫给asyncio.gather(*tasks)
async def request():
    url = 'http://www.baidu.com'
    resp = requests.get(url)
    print(resp)
    return resp


async def main():
    tasks = [request() for _ in range(1, 5)]
    await asyncio.gather(*tasks)

if __name__ == '__main__':
    results = asyncio.run(main())


# 但其实也依然是顺序执行,没有实现异步操作

await关键字

await关键字:它可以将耗时等待的操作挂起,让出控制权。如果协程在执行的时候遇到await,事件循环就会将本协程挂起,转而执行别的协程,直到其他协程挂起或执行完毕

await 后面的对象必须是如下格式之一:

  • 一个原生协程对象
  • 一个由types.coroutine 修饰的生成器,这个生成器返回协程对象
  • 一个包含__await__方法的对象返回的一个迭代器

案例

import requests
import time
import logging
import asyncio


logging.basicConfig(level=logging.INFO,format='%(asctime)s - %(levelname)s: %(message)s')


url = 'https://www.httpbin.org/delay/1'  # 延迟1s
start_time = time.time()
for _ in range(1, 10):
    resp = requests.get(url)
    logging.info(f'scraping {url}')
end_time = time.time()
logging.info(f'total time {start_time - end_time} s')


#out 同步访问耗时 18s:
'''
2022-11-20 18:13:20,426 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:22,395 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:24,697 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:26,675 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:28,680 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:30,728 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:32,905 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:34,886 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:36,980 - INFO: scraping https://www.httpbin.org/delay/1
2022-11-20 18:13:36,980 - INFO: total time -18.95430302619934 s
'''

结合aiohttp 模块实现异步请求,优化上面程序:

import aiohttp

start = time.time()


async def get(url):
    session = aiohttp.ClientSession()
    resp = await session.get(url)
    await resp.text()
    await session.close()
    return resp


async def request():
    url = 'https://www.httpbin.org/delay/1'
    resp = await get(url)

tasks = [asyncio.ensure_future(request()) for _ in range(10)]

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))

end = time.time()
print(f'cost time:{end - start}')

# out 完成10个线程总计花费2s:
'''
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
Waiting for https://www.httpbin.org/delay/1...
ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

ger response<ClientResponse(https://www.httpbin.org/delay/1) [200 OK]>
<CIMultiDictProxy('Date': 'Sun, 20 Nov 2022 10:22:13 GMT', 'Content-Type': 'application/json', 'Content-Length': '370', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>

cost time:2.4535582065582275

Process finished with exit code 0

'''

另一种实现协程的方式,python 3.8 已经弃用:

import aiohttp

start = time.time()

# py 3.8 已经弃用
@asyncio.coroutine   # 等价于 async def get(url)
def get(url):
    session = aiohttp.ClientSession()
    resp = yield from session.get(url)   # 等价于 await session.get(url)
    yield from resp.text()
    yield from session.close()
    return resp

@asyncio.coroutine
def request():
    url = 'https://www.httpbin.org/delay/1'
    print(f'Waiting for {url}...')
    resp = yield from get(url)
    print(f'ger response{resp}')
tasks = [asyncio.ensure_future(request()) for _ in range(10)]

loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))

end = time.time()

print(f'cost time:{end - start}')

异常

Traceback (most recent call last):
  File "D:\pyproject\fa_search_test\tools\async_get_data.py", line 290, in <module>
    asyncio.get_event_loop().run_until_complete(main())
  File "D:\code_tool\python\lib\asyncio\base_events.py", line 646, in run_until_complete
    return future.result()
  File "D:\pyproject\fa_search_test\tools\async_get_data.py", line 244, in main
    resp_data = await resp.json()
TypeError: object dict can't be used in 'await' expression

解决:https://segmentfault.com/q/1010000043220918

也就是:await resp.json() => json() 不是异步函数, 不能用await关键字

学习资料:

官网文档: https://docs.python.org/3/library/asyncio.html
https://www.doc88.com/p-18573348452914.html

posted @ 2022-11-20 15:00  chuangzhou  阅读(60)  评论(0编辑  收藏  举报