Python--Python并行编程实战(第2版) threading

python 并行编程

threading 线程

  • 线程由3个元素组成:程序计数器、寄存器和堆栈。
  • 同一个进程内的多个线程可以共享当前进程资源包括:数据和操作系统资源。
  • 线程有自己的状态: 就绪(ready)、运行(running)、阻塞(blocked)。
    • 创建线程 --> 就绪
    • 就绪 --> 操作系统调度线程 --> 运行
    • 运行 --> 超时 --> 就绪
    • 运行 --> 线程等待某些条件 --> 阻塞
    • 运行 --> 条件得到满足 --> 就绪
    • 运行 --> 运行完成,线程终止

threading.Thread 类

init(group=None, target=None, name=None, args=(), kwargs={})

  • 参数
    • group: 线程组,必须为None,为将来的实现保留的
    • target: 目标函数
    • name: 线程名
    • args: 目标函数的参数元组
    • kwargs: 目标函数的关键字参数字典
    • daemon: 是否为后台线程
      • 一般主线程退出后,子线程还是在运行,如果希望主线程退出后子线程自动退出,则daemon就可以设置为True

start()

启动线程

join()

等待线程结束

run()

业务逻辑,定义Thread子类时,可覆盖此方法

threading.currentThread 函数

获取当前线程

threading.Lock 类

只能一个线程获取到锁,但是,不是得到锁的线程才能释放锁,其他线程可以主动释放锁

aquire()

获得锁

release()

释放锁

locked()

判断锁是否已被获取

threading.RLock 类

重入锁,一个线程可多次获取锁,每个aquire()都要有对应的release(),只有得到锁的线程才能释放锁

aquire()

获得锁

release()

释放锁

示例1

import datetime
import random
import threading
import time


def print_(content):
    print(f'{datetime.datetime.now()} {content}')


def work():
    thread_name = threading.current_thread().name
    print_(f'thread {thread_name} start working ...')
    time.sleep(random.randint(1, 5))
    print_(f'thread {thread_name} over.')


lock = threading.Lock()


def work_lock():
    thread_name = threading.current_thread().name

    # 不是得到锁的线程才能释放锁,其他线程可以主动释放锁
    # if lock.locked():
    #     print_(f'thread {thread_name} release the lock')
    #     lock.release()

    # 加锁之后,线程按顺序执行
    lock.acquire()
    print_(f'thread {thread_name} start working ...')
    # 锁的获取与释放的位置很重要,不同位置,表现的结果不一样
    # 启动之后就释放,则上面日志按顺序输出,over日志则会随机输出
    # lock.release()
    time.sleep(random.randint(1, 5))
    print_(f'thread {thread_name} over.')
    lock.release()


r_lock = threading.RLock()


def work_r_lock():
    thread_name = threading.current_thread().name

    # 加锁之后,线程按顺序执行
    r_lock.acquire()
    print_(f'thread {thread_name} start working ...')
    r_lock.acquire()
    print_(f'thread {thread_name} second require lock')
    print_(f'thread {thread_name} first release lock')
    r_lock.release()
    time.sleep(random.randint(1, 3))
    print_(f'thread {thread_name} over.')
    r_lock.release()
    

def main():
    workers = []
    for i in range(10):
        # workers.append(threading.Thread(target=work, name=f'worker_{i}'))
        # workers.append(threading.Thread(target=work_lock, name=f'worker_lock_{i}'))
        workers.append(threading.Thread(target=work_r_lock, name=f'worker_r_lock_{i}'))

    for worker in workers:
        worker.start()

    for worker in workers:
        worker.join()

    print_('main end')


def run_or_daemon():
    name = threading.current_thread().name
    print_(f'process {name} start ...')
    if name == 'background_process':
        for i in range(0, 5):
            print_(f'process {name} ---> {i}')
            time.sleep(3)
    else:
        for i in range(5, 10):
            print_(f'process {name} ---> {i}')
            time.sleep(1)
    print_(f'process {name} exist.')


def main_daemon():
    background_proc = threading.Thread(target=run_or_daemon, name='background_process')
    no_background_proc = threading.Thread(target=run_or_daemon, name='no_background_process')
    background_proc.daemon = True
    # no_background_proc.daemon = False
    no_background_proc.start()
    background_proc.start()
    # background_proc.join()
    # no_background_proc.join()
    print_('main_daemon end.')


if __name__ == '__main__':
    # main()
    main_daemon()

threading.Semaphore 类

信号量,内部维护一个计数器,默认计数器的值为1,当调用aquire()方法时计数器值等于零,则线程将被阻塞,直到其他线程调用release(),使得计算器的值大于零。

aquire()

计数器减1

  • 参数
    • breaking: bool 是否阻塞
    • timeout: 阻塞时长
  • 返回:bool

release()

计数器加1

示例1

import logging
import random
import threading
import time

LOG_FORMAT = '%(asctime)s %(threadName)-17s %(levelname)-8s %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

semaphore = threading.Semaphore()
item = 0


def consumer():
    logging.info('Consumer is waiting...')
    semaphore.acquire()
    logging.info(f'Consumer notify: item is {item}')


def producer():
    global item
    time.sleep(3)
    item = random.randint(0, 1000)
    logging.info(f'Producer nodify: item is {item}')
    semaphore.release()
    semaphore.release()


def main():
    for i in range(10):
        t1 = threading.Thread(target=consumer, name=f'consumer_{i}')
        t2 = threading.Thread(target=producer, name=f'producer_{i}')

        t1.start()
        t2.start()

        t1.join()
        t2.join()


if __name__ == '__main__':
    main()

threading.Condition 类

条件锁,内部维护了一个锁变量和一个waiter池
有个疑问:为什么一个线程aquire后没有release,但是其他线程也可以继续aquire执行??

aquire()

获取锁,调用内部锁变量的aquire方法

release()

释放锁,调用内部锁变量的release方法

wait()

等待,加入waiter池

notify(n=1)

唤醒,从waiter池取n(默认为1)个进行唤醒

notify_all()

唤醒全部,避免有的线程一直得不到唤醒

示例1

import logging
import random
import threading
import time

LOG_FORMAT = '%(asctime)s %(threadName)-17s %(levelname)-8s %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

# lock = threading.Lock()
# condition = threading.Condition(lock)
condition = threading.Condition()
items = []

"""
有个疑问:为什么consume没有release,但是produce也可以继续执行??
"""


class Consumer(threading.Thread):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def consume(self):
        condition.acquire()
        if len(items) == 0:
            logging.info('no items to consume')
            condition.wait()
        item = items.pop()
        logging.info(f'Consume item {item}')

        condition.notify()
        condition.release()

    def run(self):
        for i in range(20):
            # time.sleep(random.randint(1, 3))
            time.sleep(2)
            self.consume()


class Producer(threading.Thread):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def produce(self, index):

        condition.acquire()
        if len(items) == 10:
            logging.info(f'items produced {len(items)}. Stopped')
            condition.wait()

        items.append(index)
        logging.info(f'Produce item {index}, now total is {len(items)}')
        condition.notify()
        condition.release()

    def run(self):
        for i in range(20):
            # time.sleep(random.randint(1, 3))
            time.sleep(0.5)
            self.produce(i)


def main():
    t1 = Consumer()
    t1_2 = Consumer()
    t1_3 = Consumer()
    t2 = Producer()

    t1.start()
    t1_2.start()
    t1_3.start()
    t2.start()

    t1.join()
    t1_2.join()
    t1_3.join()
    t2.join()


if __name__ == '__main__':
    main()

threading.Event 类

事件,内部维护了一个Condition和一个flag

wait()

等待事件发生

set()

触发事件,flag设置为True,所有wait的线程都会激活

clear()

恢复flag为False,一般调用了set()之后需要调用clear(),否则wait()将不会被阻塞

示例1

import logging
import random
import threading
import time

LOG_FORMAT = '%(asctime)s %(threadName)-17s %(levelname)-8s %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)

event = threading.Event()
items = []


class Consumer(threading.Thread):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def run(self):
        while True:
            logging.info('start wait...')
            event.wait()
            logging.info('end wait...')
            # time.sleep(random.randint(1, 3))
            time.sleep(1)
            # item = items.pop()
            logging.info(f'Consume item {1}')


class Producer(threading.Thread):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def run(self):
        for i in range(20):
            # time.sleep(random.randint(1, 3))
            time.sleep(3)
            items.append(i)
            logging.info(f'Produce item {i}, now total is {len(items)}')
            event.set()
            event.clear()


def main():
    t1 = Consumer()
    t1_2 = Consumer()
    t1_3 = Consumer()
    t2 = Producer()

    t1.start()
    t1_2.start()
    t1_3.start()
    t2.start()

    t1.join()
    t1_2.join()
    t1_3.join()
    t2.join()


if __name__ == '__main__':
    main()

threading.Barrier 类

屏障,内部维护了一个Condition,技术count,条件数量parties

init

  • 参数
    • parties: 条件数量,当调用wait的数量达到了此值,就会释放一次

wait()

等待,计数count会加1,当wait的数量达到了指定的parties,就会释放,同时count会相应的减1。

示例

import datetime
import random
import threading
import time

num_runners = 3
finish_line = threading.Barrier(num_runners)
runners = ['Huey', 'Dewey', 'Louie']


def print_(content):
    print(f'{datetime.datetime.now()} {content}')


def run():
    runner = runners.pop()
    time.sleep(random.randint(1, 3))
    print_(f'{runner} reached the finish line.')
    print_(f'{runner} index: {finish_line.wait()}')


def main():
    threads = []
    print_('start race...')
    for i in range(num_runners):
        threads.append(threading.Thread(target=run))

    for i in range(num_runners):
        threads[i].start()

    for i in range(num_runners):
        threads[i].join()

    print_('end race.')


if __name__ == '__main__':
    main()

使用queue.Queue实现线程之间数据共享

queue.Queue

线程安全的队列

  • put(): 把数据放入队列。如果队列满了,就等待。
  • get(): 从队列中拿出数据。如果队列空了,就等待。
  • task_done(): 调用了get之后,再调用task_done,这样在等待中的线程可以继续put
  • put_nowait(): 把数据放入队列。如果队列满了,就报异常。
  • get_nowait(): 从队列中拿出数据。如果队列空了,就报异常。

示例1

import datetime
import threading
import time
from queue import Queue


def print_(content):
    print(f'{datetime.datetime.now()} {content}')


class Producer(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self.queue = queue

    def run(self):
        for i in range(20):
            self.queue.put(i)
            print_(f'Producer {self.name} produce item {i}')
            time.sleep(1)


class Consumer(threading.Thread):
    def __init__(self, queue):
        threading.Thread.__init__(self)
        self.queue = queue

    def run(self):
        while True:
            print_(f'Consumer {self.name} getting...')
            item = self.queue.get()
            print_(f'Consumer {self.name} consume item {item}')
            self.queue.task_done()


if __name__ == '__main__':
    queue = Queue()
    p_1 = Producer(queue)
    c_1 = Consumer(queue)
    c_2 = Consumer(queue)
    c_3 = Consumer(queue)

    p_1.start()
    c_1.start()
    c_2.start()
    c_3.start()

    p_1.join()
    c_1.join()
    c_2.join()
    c_3.join()


posted @ 2022-03-02 16:41  liDB  阅读(372)  评论(0编辑  收藏  举报