进程线程异步编程 websockets梳理

一进程，多进程，进程池

进程：一个程序运行起来后，代码+用到的资源称之为进程，它是操作系统分配资源的基本单元

一个进程至少一个线程才能运行，一条生产电视的流水线就是一个进程，每个工人就是线程。

一个进程中可以并发多个线程，每条线程并行执行不同的任务。

进程在执行过程中拥有独立的内存单元，而多个线程共享内存

 1 import time
 2 import os
 3 from multiprocessing import Process
 4 
 5 def long_time_task(i):
 6     print(os.getpid())
 7     time.sleep(i)
 8     print("结果:{}".format(8**20))
 9 
10 if __name__ == '__main__':
11     print("当前木进程:{}".format(os.getpid()))
12     start = time.time()
13     p1 = Process(target=long_time_task,args=(1,))
14     p2 = Process(target=long_time_task,args=(2,))
15     print("等待所有子进程执行完成")
16     p1.start()
17     p2.start()
18     p1.join() #为了让母进程阻塞,等待子进程完成在打印时间
19     p2.join()
20     end = time.time()
21     print("用事{}秒".format(end-start))

多进程实例

创建子进程的几种方式

apply()：同步阻塞执行，上一个子进程结束后才能进行下一个子进程（不推荐） apply(func, args=(), kwds={}, callback=None, error_callback=None)
apply_async()：异步非阻塞执行，每个子进程都是异步执行的（并行）（推荐） apply(func, args=(), kwds={}, callback=None, error_callback=None)
map()：同步阻塞若子进程有返回值，且需集中处理，建议采用此种方式
map_async()：异步非阻塞，若想统一处理结果，map_async 比 apply_async 更方便
imap()：内存不够用可以采用此种方式，速度慢于 map()
imap_unordered：imap() 的无序版本（不会按照调用顺序返回，而是按照结束顺序返回），返回迭代器实例

 1 from multiprocessing import Pool, cpu_count
 2 import os
 3 import time
 4 
 5 
 6 def long_time_task(i):
 7     print('子进程: {} - 任务{}'.format(os.getpid(), i))
 8     time.sleep(2)
 9     print("结果: {}".format(8 ** 20))
10 
11 
12 if __name__=='__main__':
13     print("CPU内核数:{}".format(cpu_count()))
14     print('当前母进程: {}'.format(os.getpid()))
15     start = time.time()
16     p = Pool(8)
17     #for i in range(15):
18     #    p.apply_async(long_time_task, args=(i,))
19     p.map(long_time_task,range(5)) #第二个参数是个迭代器
20     print('等待所有子进程完成。')
21     p.close()
22     p.join()  #join之前要close
23     end = time.time()
24     print("总共用时{}秒".format((end - start)))

进程池

 1 # coding=utf-8
 2 
 3 import multiprocessing
 4 import time
 5 
 6 
 7 def run(a):
 8     return a * a
 9 
10 
11 data = []
12 
13 
14 def my_callback(result):
15     data.append(result)
16 
17 
18 if __name__ == '__main__':
19     st = time.time()
20     pool = multiprocessing.Pool(6)
21 
22     # 总耗时：0.4497215747833252
23     future = pool.map_async(run, range(20000))
24     print(future.get())  # [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
25 
26     # # 总耗时：3.019148111343384
27     # for i in range(20000):
28     #     pool.apply_async(run, args=(i,), callback=my_callback)
29     # 
30     # print(data)
31 
32     pool.close()
33     pool.join()
34     print(f"总耗时：{time.time() - st}")

进程池2

多进程向同一文件写入数据

 1 def callback(result):
 2     """回调函数"""
 3     with open('a.txt', 'a+', encoding="utf-8") as f:
 4         f.write(str(result) + "\n")
 5 
 6 
 7 def run(num):
 8     return num * num
 9 
10 
11 if __name__ == '__main__':
12     pool = multiprocessing.Pool(6)
13     for i in range(1000):
14         pool.apply_async(run, args=(i, ), callback=callback)
15     #pool.map_async(run, range(1000), callback=callback)
16     pool.close()
17     pool.join()

多进程向同一文件写入数据

二线程，多线程，线程池

线程：是操作系统能够进行运算调度的最小单位，被包含在进程中，

对CPU密集型代码(比如循环计算) - 多进程效率更高

对IO密集型代码(比如文件操作，网络爬虫) - 多线程效率更高。

 1 import threading
 2 import time
 3 
 4 
 5 def long_time_task(i):
 6     print('当前子线程: {} 任务{}'.format(threading.current_thread().name, i))
 7     time.sleep(2)
 8     print("结果: {}".format(8 ** 20))
 9 
10 
11 if __name__=='__main__':
12     start = time.time()
13     print('这是主线程：{}'.format(threading.current_thread().name))
14     thread_list = []
15     for i in range(1, 3):
16         t = threading.Thread(target=long_time_task, args=(i, ))
17         thread_list.append(t)
18 
19     for t in thread_list:
20         t.start()
21 
22     for t in thread_list:
23         t.join()
24 
25     end = time.time()
26     print("总共用时{}秒".format((end - start)))

多线程

t.setDaemon(True) #主进程结束不再执行子进程

 1 #!/usr/bin/env python
 2 import time
 3 import os
 4 import  threading
 5 from multiprocessing.pool import ThreadPool
 6 
 7 def long_time_task(i):
 8     print(os.getpid())
 9     time.sleep(i)
10     print("结果:{}".format(8**20))
11 
12 if __name__ == '__main__':
13     start = time.time()
14     pool = ThreadPool(5)
15     pool.map(long_time_task,range(9))
16     pool.close()
17     pool.join()
18     end = time.time()
19     print(f"总用时:{end-start}")

线程池

三 concurrent.futures

该方法是3.2后引入的新模块

ThreadPoolExecutor：多线程编程
ProcessPoolExecutor：多进程编程，适合计算密集型任务
Executor：执行器，用于管理工作池
Future：管理工作计算出的结果

 1 import time
 2 import requests
 3 from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
 4 def fetch(url):
 5     r = requests.get(url)
 6     return r.json()['args']['a']
 7 
 8 
 9 if __name__ == '__main__':
10     start = time.time()
11     numbers = range(12)
12     url = 'http://httpbin.org/get?a={}'
13     # with ThreadPoolExecutor(max_workers=3) as executor:
14     #     task_list = [executor.submit(fetch, url.format(n)) for n in range(12)]
15     #     for future in as_completed(task_list):
16     #         print(future.result())
17     #map()方式
18     with ThreadPoolExecutor(max_workers=3) as executor:
19         future = executor.map(fetch, (url.format(n) for n in numbers))
20     for result in future:
21         print(result)
22     print("total_time:", time.time() - start)

线程池两种方法

 1 import numbers
 2 import time
 3 import requests
 4 from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
 5 
 6 
 7 def fib(n):
 8     if n < 2:
 9         return 1
10     return fib(n - 1) + fib(n - 2)
11 
12 
13 def call_back(future):
14     """
15     回调(可获得返回值和错误)
16     """
17     worker_execption = future.exception()
18     if worker_execption:
19         print("异常:", worker_execption)
20     print(future.result())
21 
22 
23 def test(n):
24     if n % 2 == 0:
25         n / 0  #发生异常
26     return n * 2
27 
28 
29 if __name__ == '__main__':
30     numbers = range(20)
31     start = time.time()
32     with ProcessPoolExecutor(max_workers=3) as executor:
33         # work_dict = {executor.submit(fib, i): i for i in numbers}  #字典{future:i}
34         # print(work_dict)
35         # for future in as_completed(work_dict):
36         #     num = work_dict[future]
37         #     try:
38         #         data = future.result()
39         #     except Exception as e:
40         #         print(e)
41         #     else:
42         #         print(f"fib({num}={data})")
43         #map方式
44         # for num, result in zip(numbers, executor.map(fib, numbers)):
45         #     print(f"fib({num}={result})")
46         #通过add_done_callback获取结果和错误异常
47         for i in numbers:
48             executor.submit(test, i).add_done_callback(call_back)
49         print(f"total_time:{time.time()-start}")

进程池3种方法

以上测试都是map稍快一些，也更简洁。

四协程异步IO(asyncio)

协程：又称微线程，是一种比线程更加轻量级的存在，最重要的是，协程不被操作系统内核管理，协程是完全由程序控制的(函数直间互相调用切换)。

对于多核CPU，利用多进程+协程的方式，能充分利用CPU，获得极高的性能。

如同一个进程可以有很多线程一样，一个线程可以有很多协程。

异步是继多线程、多进程之后第三种实现并发的方式，主要用于IO密集型任务的运行效率提升,

协程概念是从 3.4 版本增加的，但 3.4 版本采用是生成器实现，为了将协程和生成器的使用场景进行区分，使语义更加明确，在 python 3.5 中增加了 async 和 await 关键字，用于定义原生协程。

python 中的 asyncio 库提供了管理事件、协程、任务和线程的方法，以及编写并发代码的原语，即 async 和 await。

异步IO的asyncio库使用事件循环驱动的协程实现并发。用户可主动控制程序，在认为耗时IO处添加await（yield from）。在asyncio库中，协程使用@asyncio.coroutine装饰，使用yield from来驱动，在python3.5中作了如下更改：

@asyncio.coroutine -> async

yield from -> await

async def：用于编写协程函数；

await：等待协程/任务的执行完成

要真正运行一个协程，asyncio 提供了 3 种主要机制：

asyncio.run() 函数用来运行最高层级的入口点 main() 函数（参考上面示例）；
await 等待一个协程（对象）；
asyncio.create_task() 函数用来并发运行作为 asyncio 任务的多个协程。
以上 3 种方式均可使协程被调度执行。

 1 # coding=utf-8
 2 
 3 import asyncio
 4 from multiprocessing.connection import wait
 5 
 6 async def coroutine_example():
 7     await asyncio.sleep(1)
 8     print('ok')
 9     return "ok"
10 
11 
12 coro = coroutine_example()
13 
14 loop = asyncio.get_event_loop()
15 task = loop.create_task(coro)
16 print('运行情况：', task)
17 
18 loop.run_until_complete(task)
19 print('再看下运行情况：', task.result())
20 loop.close()

基础实例

 1 import asyncio
 2 
 3 def my_callback(future):
 4     print('返回值：', future.result())
 5 
 6 async def coroutine_example():
 7     await asyncio.sleep(1)
 8     return 'ok'
 9 
10 coro = coroutine_example()
11 
12 loop = asyncio.get_event_loop()
13 
14 task = loop.create_task(coro)
15 task.add_done_callback(my_callback)
16 
17 loop.run_until_complete(task)
18 loop.close()

通过callback获取返回值

 1 async def coroutine_example(name):
 2     print("doing_name:", name)
 3     await asyncio.sleep(2)
 4     print('done_name:', name)
 5     return "wzy"
 6 
 7 
 8 loop = asyncio.get_event_loop()
 9 tasks = [
10     loop.create_task(coroutine_example('wzy_' + str(i))) for i in range(3)
11 ]
12 wait_core = asyncio.wait(tasks) #控制多任务
13 loop.run_until_complete(wait_core)
14 for task in tasks:
15     print("wxh_result:", task.result())
16 loop.close()

async基础实例并获取返回值

 1 import asyncio
 2 
 3 a = []
 4 
 5 def callback(future):
 6     print("111111", future.result())
 7     a.append(future.result())
 8 
 9 
10 async def coroutine_example(name):
11     print("doing_name:", name)
12     await asyncio.sleep(2)
13     print('done_name:', name)
14     return "wzy"
15 
16 
17 loop = asyncio.get_event_loop()
18 tasks = []
19 for i in range(3):
20     task = loop.create_task(coroutine_example('Zarten_' + str(i)))
21     task.add_done_callback(callback)
22     tasks.append(task)
23 wait_coro = asyncio.wait(tasks)
24 loop.run_until_complete(wait_coro)
25 loop.close()
26 print("a:", a)

通过callback获取返回值

六 websockets

pass

posted @ 2022-09-25 17:41 liumj 阅读(123) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

liumj

进程 线程 异步编程 websockets梳理

一 进程，多进程，进程池

二 线程，多线程，线程池