协程

协程

1.1 我的理解

在函数之间切换,有些代码需要io,就要立马切换运行其他函数,协程最重要的就是事件循环,去查看哪些io好了,好了就执行,没好就去执行其他的函数,遇到io的就来回切。单线程内函数切换不耗资源。

1.2 asyncio

1.包含各种特定系统实现的模块化事件循环
2.传输层和协议抽象  TCP、UDP等
3.模仿futures模块但适用于事件循环使用的Future类
4.同步代码编写并发代码
5.当必须使用一个将产生阻塞IO对的调用时,有接口可以把这个给事件提交给线程池。
asyncio 是解决异步的一整套方案
tornado(实现了web服务器)、gevent、twisted、sanic、

1.2.1 asyncio 简单使用

import asyncio
import time
# 事件循环+回调(驱动生成器、协程)+ epoll
async  def  test():
    print('start get url')
    #time.sleep(2)
    await  asyncio.sleep(2)
    print('end get url')
    
start_time  = time.time()
loop = asyncio.get_event_loop()
tasks = [test() for i in range(10)]
loop.run_until_complete(asyncio.wait(tasks))
print(time.time()-start_time)

done,pending = await asyncio.wait(task_list) # done 是已经完成的,pending是没有完成的,wait可以设置timeout,在timeout还没有执行完的就会放到pending中

1.2.2 获取协程返回值

# v1
async  def  test():
    print('start get url')
    #time.sleep(2)
    await  asyncio.sleep(2)
    print('end get url')
    return 1
loop = asyncio.get_event_loop()
future = asyncio.ensure_future(test())
loop.run_until_complete(future)
print(future.result())
# v2 
async  def  test():
    print('start get url')
    #time.sleep(2)
    await  asyncio.sleep(2)
    print('end get url')
    return 1
loop = asyncio.get_event_loop()
# future = asyncio.ensure_future(test())
task = loop.create_task(test())
loop.run_until_complete(task)

print(task.result())

1.2.3 callback 回调

from functools import partial # 偏函数

async  def  test():
    print('start get url')
    #time.sleep(2)
    await  asyncio.sleep(2)
    print('end get url')
    return 1
def callback(name,future):
    print(name,future.result())
loop = asyncio.get_event_loop()
# future = asyncio.ensure_future(test())
task = loop.create_task(test())
task.add_done_callback(partial(callback,"lqw"))
loop.run_until_complete(task,)

1.2.4 wait 和 gather

1.2.4.1 wait
async  def  test(time):
    print('start get url')
    #time.sleep(2)
    await  asyncio.sleep(time)
    print('end get url')
    return 1
def callback(name,future):
    print(name,future.result())
import time
import concurrent
start_time = time.time()
loop = asyncio.get_event_loop()
tasks = [ test(i) for i in range(1,3)]
loop.run_until_complete(asyncio.wait(tasks,return_when=concurrent.futures.FIRST_COMPLETED))
print(time.time() -start_time)  # 1.几秒。
1.2.4.2 gather
async  def  test(time):
    print('start get url')
    #time.sleep(2)
    await  asyncio.sleep(time)
    print('end get url')
    return 1
def callback(name,future):
    print(name,future.result())
import time
import concurrent
start_time = time.time()
loop = asyncio.get_event_loop()
tasks = [ test(i) for i in range(1,3)]
# task.add_done_callback(partial(callback,"lqw"))
loop.run_until_complete(asyncio.gather(*tasks)) # 解析
print(time.time() -start_time)

gather 更加高层。可以分组
group1 = [task1,task2]
group2 = [task3,task4]
group1 = asyncio.gather(*group1)
group2 = asyncio.gather(*group2)
loop.run_until_complete(asyncio.gather(group1,group2))
1.2.4.3 取消 task(future)
async  def get_html(time):
    print('get ')
    await asyncio.sleep(time)
    print(' end')
task1 = get_html(2)
task2 = get_html(3)
task3 = get_html(3)
tasks = [task1,task2,task3]
loop = asyncio.get_event_loop()
try:
	loop.run_until_complete(asyncio.wait(tasks))
except:
    all_tasks = asyncio.Task.all_tasks()
	for task in all_tasks:
    	task.cancel()
	loop.stop()
	loop.run_forever() # 一定要
finally:
    loop.close()

1.2.5 call_soon、call_later、call_at、call_soon_threadsafe

def callback(sleep_times):
    print('shijian %s'%sleep_times)
def stop(loop):
    loop.stop()
loop = asyncio.get_event_loop()
time = loop.time()
loop.call_at(time+1,callback,2)  # 以loop.time()  为准
loop.call_soon(callback,2)
loop.call_soon(stop,loop)
loop.call_later(2,callback,1)  # 延迟后执行
loop.call_soon_threadsafe() # 线程安全的。
loop.run_forever()

1.2.6 结合 线程池

from concurrent.futures import ThreadPoolExecutor
# 运用多线程
def get_url(s):
    time.sleep(s)
    print('end',s)
    return  111
if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    executor = ThreadPoolExecutor()
    tasks = [loop.run_in_executor(executor,get_url,i) for i in range(1,3)]
    loop.run_until_complete(asyncio.wait(tasks))

1.2.7 asyncio 模拟 http请求,(as_completed的使用)

from urllib.parse import urlparse
async  def get_url(url):
    url_obj = urlparse(url)
    host = url_obj.netloc
    address = url_obj.path if url_obj.path else '/'
    reader,writer  =  await  asyncio.open_connection(host,80)
    writer.write(f"GET {address} HTTP/1.1\r\nHost:{host}\r\nConnection:close\r\n\r\n".encode('utf8'))
    content_list = []
    async  for content in reader:   # 实现了 anext:
        data = content.decode('utf8')
        content_list.append(data)
    # data = await reader.read()  # 也可以用这个。
    # print(data.decode('utf8'))
    return '\n'.join(content_list)
    # pprint.pprint(data.split('\r\n\r\n')[1])
async def main(loop):
    tasks = [asyncio.ensure_future(get_url('http://www.baidu.com')) for i in range(10)]
    for tasks in asyncio.as_completed(tasks):
        result =  await  tasks
        print(result)
loop = asyncio.get_event_loop()
loop.run_until_complete(main(loop)) 

1.2.8 task 和 future

将 task 和 线程 池的future统一  task是future的子类。 线程不需要 send(None),协程必须要启动,抽象出了task方法,task 是 future的桥梁。设计角度,解决协程和线程不一样之间的地方。

1.2.9 协程锁

import asyncio
import aiohttp
cache = {}
lock = asyncio.Lock()
queue = asyncio.Queue()  # 单线程也可以达到消息通信的机制
async  def get_stuff(url):
    async  with  lock:   
        if url in cache:
            return cache[url]
        stuff = await aiohttp.request('get',url)
        cache[url] = stuff
        return stuff
async def test1():
     res = await  get_stuff('dada')
async def test2():
    res1 = await  get_stuff('dada')

1.3.0 异步爬虫

import aiohttp
import asyncio
import re
import aiomysql

# 爬虫,去重,入库

from pyquery import PyQuery
waiting_urls = []
seen_urls = set()
stopping = False
async  def fetch(url,session):
        async with session.get(url) as response:
            if response.status in [200,201]:
                html = await response.text()
                return html

def parse_contene(html):
    urls  = []
    pq  = PyQuery(html)
    for link in pq.items('a'):
        url = link.attr('href')
        print(url)
        if url and url.startswith('http') and url not in seen_urls:
            urls.append(url)
            waiting_urls.append(url)
    return  urls

async  def consumer(pool):
    async with aiohttp.ClientSession() as session:
        while not stopping:
            if len(waiting_urls) == 0:
                await  asyncio.sleep(0.5)
                continue
            url = waiting_urls.pop()
            print('pop 出的',url)
            if re.match('http://.*?jobbole.com.*?\d+.html',url):
                if url not in seen_urls:
                    # 提交一个协程,解析一个
                    asyncio.ensure_future(article_handle(url,session,pool))
            else:
                asyncio.ensure_future(init_urls(url,session))
async  def article_handle(url,session,pool):
        # 获取详情
        html = await  fetch(url,session)
        seen_urls.add(url)
        parse_contene(html)
        pq = PyQuery(html)
        title = pq('title').text()
        print('标题',title)
        # async with  pool.acquire() as conn:
        #     async with conn.cursor() as cur:
        #         # 插入。
        #         await cur.execute('')



async  def init_urls(start_url,session):
    html = await  fetch(start_url,session)
    parse_contene(html)


async  def main(loop):
    # 等待mysql连接好
    start_url = 'http://www.jobbole.com/'
    #pool = await  aiomysql.connect(host='127.0.0.1', port=3306,
                                       # user='root', password='', db='mysql',
                                       # loop=loop,charset='utf8',autocommit=True)
    pool = 'xx'
    async with aiohttp.ClientSession() as session:
        html = await fetch(start_url,session)
        seen_urls.add(start_url)
        parse_contene(html)

    asyncio.ensure_future(consumer(pool))

if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    asyncio.ensure_future(main(loop))
    loop.run_forever()

1.4 set_result 的使用

import asyncio
loop = asyncio.get_event_loop()
def set_res(fur):
    fur.set_result("test")
async  def main():
    fur  = loop.create_future()
    loop.call_later(3,set_res,fur)
    res = await  fur   # 会一直等到有结果,如果不执行 loop.call_later(3,set_res,fur) 会一直卡住。
    print(res)
loop.run_until_complete(main())

1.5 future 对象

asyncio 可以将线程池的future对象转成,asyncio 里的future 对象。
import time
loop = asyncio.get_event_loop()
def mysleep(s):
    time.sleep(s)
  
async def demo():
    print('start')
    fur = loop.run_in_executor(None,mysleep,3)  # 默认为线程池,也可以方进程池,变成future对象了
    await  fur
    print('end')

tasks = [demo(),demo(),demo()]
loop.run_until_complete(asyncio.wait(tasks))

1.6 异步爬虫

import asyncio
import aiofiles
import aiohttp
import time
img_urls  =['https://img.lianzhixiu.com/uploads/allimg/202010/9999/bfed842d57.jpg','https://img.lianzhixiu.com/uploads/allimg/202010/9999/7d0c02fd74.jpg','https://img.lianzhixiu.com/uploads/allimg/202010/9999/edb377d1b3.jpg']
async def get_images(url,session):
        async with session.get(url) as response:
            if response.status in [200,201]:

                content = await response.content.read()
                async with aiofiles.open(f'{url.split("/")[-1]}',"wb") as fp:
                    await fp.write(content)
async  def main():
    async with aiohttp.ClientSession() as session:
        tasks = [asyncio.ensure_future(get_images(url,session)) for url in img_urls]
        await asyncio.wait(tasks)
        
import requests
def request_get(url):
    content = requests.get(url).content
    with open(f'{url.split("/")[-1]}', "wb") as fp:
         fp.write(content)

def main_v1():
    for url in img_urls:
        request_get(url)

if __name__ == '__main__':
    # start_time = time.time()
    # loop = asyncio.get_event_loop()
    # loop.run_until_complete(main())
    # print(time.time()-start_time)
    start_time = time.time()
    main_v1()
    print(time.time()-start_time)
# 异步1秒
# 同步2秒

1.7 异步迭代器

import random
import time
import asyncio
class MyAsyncIter():
    def __init__(self):
        self.count = 0
    def __aiter__(self):
        return self

    async  def __anext__(self):
        await  asyncio.sleep(random.randint(1,3))
        self.count+=1
        print(self.count)
        if self.count >10:
            raise StopAsyncIteration
        return self.count

async  def test():
    await  asyncio.sleep(1)
    print('asda')
    await  asyncio.sleep(2)
    print('asda')

async  def main():
    asyncio.ensure_future(test())
    async for i in MyAsyncIter():
        print(i)

if __name__ == '__main__':
    asyncio.run(main())

1.8 异步上下文管理器

import asyncio
import random
class MyEnter():

    def __init__(self):
        pass
    async def __aenter__(self):
        await asyncio.sleep(3)
        print('返回了')
        return self
    async def close(self):
        await  asyncio.sleep(3)
        print('close')
    async def __aexit__(self, *args):
        await self.close()

async def  test():
    await asyncio.sleep(4)
    print('end')

async  def main():
    asyncio.ensure_future(test())
    async with MyEnter() as d:
        await d.close()
if __name__ == '__main__':
    asyncio.run(main())


 

posted @ 2022-01-18 20:55  我也想18岁  阅读(46)  评论(0编辑  收藏  举报