进程基本内容

进程:系统分配资源的最小单位,一般可以理解为"正在运行中的程序"

1.通过multiprocessing模块开启进程

from multiprocessing import Process
import os
import time
import random


def func(name, age):
    print(f'发送一封邮件给{age}岁的{name}')
    time.sleep(random.random())  # 堵塞
    print('发送完毕')


if __name__ == '__main__':
    print(f'当前进程{os.getpid()}, 父进程{os.getppid()}')
    args_lst = [('aelx', 84), ('wusir', 60), ('eva', 36)]
    p_ls = []
    for i in args_lst:
        p = Process(target=func, args=i)
        p.start()
        p_ls.append(p)
    for p in p_ls:
        p.join()  # 同步阻塞

    print('所有邮件发送完毕')

 

2.开启进程的其他方法

import os
from multiprocessing import Process


class MyProcess(Process):
    def __init__(self, a, b, c):
        super().__init__()
        self.a = a
        self.b = b
        self.c = c

    def run(self):
        """开启子进程时自动调用该方法"""
        print(os.getppid(), os.getpid(), self.a, self.b, self.c)


if __name__ == '__main__':
    print(os.getpid())
    p = MyProcess(1, 2, 3)
    p.start()

 

3.Process类的其他属性和方法

import os
import time
from multiprocessing import Process


class MyProcess(Process):
    def __init__(self, a, b, c):
        super().__init__()
        self.a = a
        self.b = b
        self.c = c

    def run(self):
        """开启子进程时自动调用该方法"""
        time.sleep(1)
        print(os.getppid(), os.getpid(), self.a, self.b, self.c)


if __name__ == '__main__':
    print(os.getpid())
    p = MyProcess(1, 2, 3)
    p.start()

    print(p.pid, p.ident) # 当前子进程的id
    print(p.name)

    print(p.is_alive())
    p.terminate()  # 强制结束一个子进程,异步非堵塞
    time.sleep(0.01)
    print(p.is_alive())

 

4.守护进程

from multiprocessing import Process
import time


def son1():

    while True:
        print('in son1')
        time.sleep(1)


def son2():
    for i in range(10):
        print('in son2')
        time.sleep(1)


if __name__ == '__main__':
    p1 = Process(target=son1)
    p1.daemon = True  # 设置p是一个守护进程
    p1.start()

    p2 = Process(target=son2)
    p2.start()

    # time.sleep(3)  # 这里相当于给了守护进程3秒的执行时间

    p2.join()  # 使守护进程在p2结束后才结束








# 主进程会等待子进程结束,是为了回收子进程的资源

# 守护进程会等待主进程的代码执行结束后再结束,而不是等待整个主进程结束
# 因为守护进程实质上也是一个子进程,它结束后主进程才能回收其资源

 

5.进程的队列

"""
进程之间数据隔离

进程之间通信:Inter Process communication  --> IPC
    基于文件
        同一台机器上的多个进程之间通信
        基于socket的文件级别的通信来完成数据传递


    基于网络
        同一台机器或者多台机器上的多进程通信
        第三方工具(消息中间件):
            memcache
            redis
            rabbitmq
            kafka


"""
from multiprocessing import Queue, Process


def son(q):
    q.put('hello')


if __name__ == '__main__':
    q = Queue()
    Process(target=son, args=(q, )).start()
    print(q.get())  # 获取在子进程中往队列中添加的值

 

6.生产者消费者模型

"""
生产者消费者模型:
    爬虫的时候:
        分布式操作:celery分布式框架

    本质:
        让生产数据和消费数据的效率都达到平衡并且最大化的效率

"""
from multiprocessing import Queue, Process
import random
import time


def consumer(q):  # 消费者
    while True:
        if q.get():
            print(q.get())
        else:
            break


def producer(q, name, food):  # 生产者
    for i in range(1, 11):
        foodi = f"{food}->{i}"

        print(f"{name}生产了{i}个{food}")
        time.sleep(random.random())
        q.put(foodi)


if __name__ == '__main__':
    q = Queue()

    c1 = Process(target=consumer, args=(q,))
    p1 = Process(target=producer, args=(q, 'alex', '冰淇淋'))
    p2 = Process(target=producer, args=(q, 'wusir', '芋圆'))

    c1.start()
    p1.start()
    p2.start()

    p1.join()
    p2.join()
    q.put(None)  # p1,p2生产完后,给消费者发送一个None消息(如果有多个消费者,就要发送多个None)

 

7.异步阻塞与基于生产者消费者模型的爬虫例子

import requests
from multiprocessing import Process, Queue


"""
异步阻塞:
    不确定子进程的执行顺序,且不知道谁的执行结果先返回
    
"""
url_ls = list()
for i in range(1, 6):
    if i == 1:
        url = 'https://www.dygod.net/html/gndy/dyzz/'
    else:
        url = 'https://www.dygod.net/html/gndy/dyzz/' + 'index_' + str(i) + '.html'

    url_ls.append(url)


def producer(i, url, q):
    ret = requests.get(url)
    # print(ret.text.encode('latin1').decode('gbk'))
    # q.put((i, ret.status_code))
    q.put((i, ret.text.encode('latin1').decode('gbk')))


def consumer(q):
    while True:
        t = q.get()
        if t is None:
            break
        with open(f"电影天堂第{t[0]}页.html", encoding='gbk', mode='w') as f:
            f.write(t[1])


if __name__ == '__main__':
    q = Queue()

    p_ls = []
    for index, url in enumerate(url_ls, 1):
        p = Process(target=producer, args=(index, url, q))
        p.start()
        p_ls.append(p)

    # for i in range(len(url_ls)):
    #     print(q.get())

    Process(target=consumer, args=(q,)).start()

    for p in p_ls:
        p.join()

    q.put(None)

 

8.进程之间通过Manage类实现数据共享

from multiprocessing import Process, Manager, Lock


def change_dic(dic, lock):
    with lock:

        dic['count'] -= 1


if __name__ == '__main__':

    m = Manager()   # 进程之间数据是隔离的,需要通过Manager使某个数据共享
    dic = m.dict({'count': 100})

    lock = Lock()

    p_l = []
    for i in range(100):
        p = Process(target=change_dic, args=(dic, lock))
        p.start()
        p_l.append(p)

    for p in p_l:p.join()

    print(dic)
posted @ 2020-09-28 17:09  LGQ天  阅读(171)  评论(0编辑  收藏  举报