协程

协程的好处:当进程有耗时操作时,通过协程可以完成后续操作。协程切换任务资源很小,效率高。

使用yield完成多任务:

 1 import time
 2 
 3 
 4 def work1():
 5     while True:
 6         print("--- 1 ---")
 7         time.sleep(0.1)
 8         yield
 9 
10 
11 def work2():
12     while True:
13         print("--- 2 ---")
14         time.sleep(0.1)
15         yield
16 
17 
18 def main():
19     t1 = work1()
20     t2 = work2()
21     # 先让t1运行一会,当t1遇到yield的时候,再返回到25行,然后执行t2
22     # 当t2遇到yield的时候,再返回26行,循环后回到25行,切换到t1中
23     # 这样t1/t2/t1/t2的交替运行,最终实现了多任务 —— 协程
24     while True:
25         next(t1)
26         next(t2)
27 
28 
29 if __name__ == '__main__':
30     main()

  运行结果:

 1 --- 1 ---
 2 --- 2 ---
 3 --- 1 ---
 4 --- 2 ---
 5 --- 1 ---
 6 --- 2 ---
 7 --- 1 ---
 8 --- 2 ---
 9 --- 1 ---
10 --- 2 ---
11 ......

使用greenlet完成多任务:

 1 from greenlet import greenlet
 2 import time
 3 
 4 
 5 def work1():
 6     while True:
 7         print("--- 1 ---")
 8         gr2.switch()
 9         time.sleep(0.5)
10 
11 
12 def work2():
13     while True:
14         print("--- 2 ---")
15         gr1.switch()
16         time.sleep(0.5)
17 
18 
19 gr1 = greenlet(work1)
20 gr2 = greenlet(work2)
21 
22 # 切换到gr1中运行
23 gr1.switch()

  运行结果:

1 --- 1 ---
2 --- 2 ---
3 --- 1 ---
4 --- 2 ---
5 --- 1 ---
6 --- 2 ---
7 ......

使用gevent完成多任务:

 1 import gevent
 2 from gevent import monkey
 3 import time
 4 
 5 # 有耗时操作时需要写
 6 monkey.patch_all()    # 将程序中用到的耗时操作的代码,换为gevent中自己实现的模块
 7 
 8 
 9 def f(n):
10     for i in range(n):
11         print(gevent.getcurrent(), i)
12         # 程序中的耗时操作的代码,用monkey.patch_all()可以将延迟转换程gevent自己实现的模块
13         # 等同gevent.sleep(0.5)
14         time.sleep(0.5)
15         # gevent.sleep(0.5)
16 
17 
18 # 平时使用常用joinall
19 gevent.joinall([gevent.spawn(f, 5), gevent.spawn(f, 5), gevent.spawn(f, 5)])
20 # print("测试代码1")
21 # g1 = gevent.spawn(f, 5)
22 # print("测试代码2")
23 # g2 = gevent.spawn(f, 5)
24 # print("测试代码3")
25 # g3 = gevent.spawn(f, 5)
26 # print("测试代码4")
27 # g1.join()
28 # print("测试代码5")
29 # g2.join()
30 # print("测试代码6")
31 # g3.join()
32 # 使用测试代码得知:gevent.spawn创建的对象只有在遇到耗时操作时才会执行传入的函数的代码
33 # 函数的代码执行完了后,再执行耗时操作后面的代码

  运行结果:

 1 <Greenlet at 0x34e7150: f(5)> 0
 2 <Greenlet at 0x34e7260: f(5)> 0
 3 <Greenlet at 0x34e7370: f(5)> 0
 4 <Greenlet at 0x34e7150: f(5)> 1
 5 <Greenlet at 0x34e7260: f(5)> 1
 6 <Greenlet at 0x34e7370: f(5)> 1
 7 <Greenlet at 0x34e7150: f(5)> 2
 8 <Greenlet at 0x34e7260: f(5)> 2
 9 <Greenlet at 0x34e7370: f(5)> 2
10 <Greenlet at 0x34e7150: f(5)> 3
11 <Greenlet at 0x34e7260: f(5)> 3
12 <Greenlet at 0x34e7370: f(5)> 3
13 <Greenlet at 0x34e7150: f(5)> 4
14 <Greenlet at 0x34e7260: f(5)> 4
15 <Greenlet at 0x34e7370: f(5)> 4

案例:使用gevent多任务爬取图片:

 1 import requests
 2 import re
 3 import os
 4 import gevent
 5 from gevent import monkey
 6 
 7 monkey.patch_all()
 8 
 9 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36'}
10 response = requests.get('https://www.qqtn.com/article/article_277732_1.html', headers=headers)
11 # print(response)
12 html = response.text
13 dir_name = ['美女微信背景图'][0]
14 os.mkdir(dir_name)
15 
16 urls = re.findall('<p align="center"><img src="(.*?)"/></p>', html)
17 print(urls)
18 
19 
20 def download(img_name, img_url):
21     req = requests.get(img_url)
22     img_content = req.content
23     with open(dir_name + '/' + img_name, 'wb') as f:
24         f.write(img_content)
25 
26 
27 def main():
28     i = 0
29     while i < len(urls):
30         name = str(i)+'.jpg'
31         gevent.joinall([
32             gevent.spawn(download, name, urls[i])
33         ])
34         i += 1
35 
36 
37 if __name__ == '__main__':
38     main()
posted @ 2020-04-14 10:41  组装梦想  阅读(153)  评论(0编辑  收藏  举报