Python--Python并行编程实战(第2版) threading
python 并行编程
threading 线程
- 线程由3个元素组成:程序计数器、寄存器和堆栈。
- 同一个进程内的多个线程可以共享当前进程资源包括:数据和操作系统资源。
- 线程有自己的状态: 就绪(ready)、运行(running)、阻塞(blocked)。
- 创建线程 --> 就绪
- 就绪 --> 操作系统调度线程 --> 运行
- 运行 --> 超时 --> 就绪
- 运行 --> 线程等待某些条件 --> 阻塞
- 运行 --> 条件得到满足 --> 就绪
- 运行 --> 运行完成,线程终止
threading.Thread 类
init(group=None, target=None, name=None, args=(), kwargs={})
- 参数
- group: 线程组,必须为None,为将来的实现保留的
- target: 目标函数
- name: 线程名
- args: 目标函数的参数元组
- kwargs: 目标函数的关键字参数字典
- daemon: 是否为后台线程
- 一般主线程退出后,子线程还是在运行,如果希望主线程退出后子线程自动退出,则daemon就可以设置为True
start()
启动线程
join()
等待线程结束
run()
业务逻辑,定义Thread子类时,可覆盖此方法
threading.currentThread 函数
获取当前线程
threading.Lock 类
只能一个线程获取到锁,但是,不是得到锁的线程才能释放锁,其他线程可以主动释放锁
aquire()
获得锁
release()
释放锁
locked()
判断锁是否已被获取
threading.RLock 类
重入锁,一个线程可多次获取锁,每个aquire()都要有对应的release(),只有得到锁的线程才能释放锁
aquire()
获得锁
release()
释放锁
示例1
import datetime
import random
import threading
import time
def print_(content):
print(f'{datetime.datetime.now()} {content}')
def work():
thread_name = threading.current_thread().name
print_(f'thread {thread_name} start working ...')
time.sleep(random.randint(1, 5))
print_(f'thread {thread_name} over.')
lock = threading.Lock()
def work_lock():
thread_name = threading.current_thread().name
# 不是得到锁的线程才能释放锁,其他线程可以主动释放锁
# if lock.locked():
# print_(f'thread {thread_name} release the lock')
# lock.release()
# 加锁之后,线程按顺序执行
lock.acquire()
print_(f'thread {thread_name} start working ...')
# 锁的获取与释放的位置很重要,不同位置,表现的结果不一样
# 启动之后就释放,则上面日志按顺序输出,over日志则会随机输出
# lock.release()
time.sleep(random.randint(1, 5))
print_(f'thread {thread_name} over.')
lock.release()
r_lock = threading.RLock()
def work_r_lock():
thread_name = threading.current_thread().name
# 加锁之后,线程按顺序执行
r_lock.acquire()
print_(f'thread {thread_name} start working ...')
r_lock.acquire()
print_(f'thread {thread_name} second require lock')
print_(f'thread {thread_name} first release lock')
r_lock.release()
time.sleep(random.randint(1, 3))
print_(f'thread {thread_name} over.')
r_lock.release()
def main():
workers = []
for i in range(10):
# workers.append(threading.Thread(target=work, name=f'worker_{i}'))
# workers.append(threading.Thread(target=work_lock, name=f'worker_lock_{i}'))
workers.append(threading.Thread(target=work_r_lock, name=f'worker_r_lock_{i}'))
for worker in workers:
worker.start()
for worker in workers:
worker.join()
print_('main end')
def run_or_daemon():
name = threading.current_thread().name
print_(f'process {name} start ...')
if name == 'background_process':
for i in range(0, 5):
print_(f'process {name} ---> {i}')
time.sleep(3)
else:
for i in range(5, 10):
print_(f'process {name} ---> {i}')
time.sleep(1)
print_(f'process {name} exist.')
def main_daemon():
background_proc = threading.Thread(target=run_or_daemon, name='background_process')
no_background_proc = threading.Thread(target=run_or_daemon, name='no_background_process')
background_proc.daemon = True
# no_background_proc.daemon = False
no_background_proc.start()
background_proc.start()
# background_proc.join()
# no_background_proc.join()
print_('main_daemon end.')
if __name__ == '__main__':
# main()
main_daemon()
threading.Semaphore 类
信号量,内部维护一个计数器,默认计数器的值为1,当调用aquire()方法时计数器值等于零,则线程将被阻塞,直到其他线程调用release(),使得计算器的值大于零。
aquire()
计数器减1
- 参数
- breaking: bool 是否阻塞
- timeout: 阻塞时长
- 返回:bool
release()
计数器加1
示例1
import logging
import random
import threading
import time
LOG_FORMAT = '%(asctime)s %(threadName)-17s %(levelname)-8s %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
semaphore = threading.Semaphore()
item = 0
def consumer():
logging.info('Consumer is waiting...')
semaphore.acquire()
logging.info(f'Consumer notify: item is {item}')
def producer():
global item
time.sleep(3)
item = random.randint(0, 1000)
logging.info(f'Producer nodify: item is {item}')
semaphore.release()
semaphore.release()
def main():
for i in range(10):
t1 = threading.Thread(target=consumer, name=f'consumer_{i}')
t2 = threading.Thread(target=producer, name=f'producer_{i}')
t1.start()
t2.start()
t1.join()
t2.join()
if __name__ == '__main__':
main()
threading.Condition 类
条件锁,内部维护了一个锁变量和一个waiter池
有个疑问:为什么一个线程aquire后没有release,但是其他线程也可以继续aquire执行??
aquire()
获取锁,调用内部锁变量的aquire方法
release()
释放锁,调用内部锁变量的release方法
wait()
等待,加入waiter池
notify(n=1)
唤醒,从waiter池取n(默认为1)个进行唤醒
notify_all()
唤醒全部,避免有的线程一直得不到唤醒
示例1
import logging
import random
import threading
import time
LOG_FORMAT = '%(asctime)s %(threadName)-17s %(levelname)-8s %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
# lock = threading.Lock()
# condition = threading.Condition(lock)
condition = threading.Condition()
items = []
"""
有个疑问:为什么consume没有release,但是produce也可以继续执行??
"""
class Consumer(threading.Thread):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def consume(self):
condition.acquire()
if len(items) == 0:
logging.info('no items to consume')
condition.wait()
item = items.pop()
logging.info(f'Consume item {item}')
condition.notify()
condition.release()
def run(self):
for i in range(20):
# time.sleep(random.randint(1, 3))
time.sleep(2)
self.consume()
class Producer(threading.Thread):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def produce(self, index):
condition.acquire()
if len(items) == 10:
logging.info(f'items produced {len(items)}. Stopped')
condition.wait()
items.append(index)
logging.info(f'Produce item {index}, now total is {len(items)}')
condition.notify()
condition.release()
def run(self):
for i in range(20):
# time.sleep(random.randint(1, 3))
time.sleep(0.5)
self.produce(i)
def main():
t1 = Consumer()
t1_2 = Consumer()
t1_3 = Consumer()
t2 = Producer()
t1.start()
t1_2.start()
t1_3.start()
t2.start()
t1.join()
t1_2.join()
t1_3.join()
t2.join()
if __name__ == '__main__':
main()
threading.Event 类
事件,内部维护了一个Condition和一个flag
wait()
等待事件发生
set()
触发事件,flag设置为True,所有wait的线程都会激活
clear()
恢复flag为False,一般调用了set()之后需要调用clear(),否则wait()将不会被阻塞
示例1
import logging
import random
import threading
import time
LOG_FORMAT = '%(asctime)s %(threadName)-17s %(levelname)-8s %(message)s'
logging.basicConfig(level=logging.INFO, format=LOG_FORMAT)
event = threading.Event()
items = []
class Consumer(threading.Thread):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def run(self):
while True:
logging.info('start wait...')
event.wait()
logging.info('end wait...')
# time.sleep(random.randint(1, 3))
time.sleep(1)
# item = items.pop()
logging.info(f'Consume item {1}')
class Producer(threading.Thread):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def run(self):
for i in range(20):
# time.sleep(random.randint(1, 3))
time.sleep(3)
items.append(i)
logging.info(f'Produce item {i}, now total is {len(items)}')
event.set()
event.clear()
def main():
t1 = Consumer()
t1_2 = Consumer()
t1_3 = Consumer()
t2 = Producer()
t1.start()
t1_2.start()
t1_3.start()
t2.start()
t1.join()
t1_2.join()
t1_3.join()
t2.join()
if __name__ == '__main__':
main()
threading.Barrier 类
屏障,内部维护了一个Condition,技术count,条件数量parties
init
- 参数
- parties: 条件数量,当调用wait的数量达到了此值,就会释放一次
wait()
等待,计数count会加1,当wait的数量达到了指定的parties,就会释放,同时count会相应的减1。
示例
import datetime
import random
import threading
import time
num_runners = 3
finish_line = threading.Barrier(num_runners)
runners = ['Huey', 'Dewey', 'Louie']
def print_(content):
print(f'{datetime.datetime.now()} {content}')
def run():
runner = runners.pop()
time.sleep(random.randint(1, 3))
print_(f'{runner} reached the finish line.')
print_(f'{runner} index: {finish_line.wait()}')
def main():
threads = []
print_('start race...')
for i in range(num_runners):
threads.append(threading.Thread(target=run))
for i in range(num_runners):
threads[i].start()
for i in range(num_runners):
threads[i].join()
print_('end race.')
if __name__ == '__main__':
main()
使用queue.Queue实现线程之间数据共享
queue.Queue
线程安全的队列
- put(): 把数据放入队列。如果队列满了,就等待。
- get(): 从队列中拿出数据。如果队列空了,就等待。
- task_done(): 调用了get之后,再调用task_done,这样在等待中的线程可以继续put
- put_nowait(): 把数据放入队列。如果队列满了,就报异常。
- get_nowait(): 从队列中拿出数据。如果队列空了,就报异常。
示例1
import datetime
import threading
import time
from queue import Queue
def print_(content):
print(f'{datetime.datetime.now()} {content}')
class Producer(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
for i in range(20):
self.queue.put(i)
print_(f'Producer {self.name} produce item {i}')
time.sleep(1)
class Consumer(threading.Thread):
def __init__(self, queue):
threading.Thread.__init__(self)
self.queue = queue
def run(self):
while True:
print_(f'Consumer {self.name} getting...')
item = self.queue.get()
print_(f'Consumer {self.name} consume item {item}')
self.queue.task_done()
if __name__ == '__main__':
queue = Queue()
p_1 = Producer(queue)
c_1 = Consumer(queue)
c_2 = Consumer(queue)
c_3 = Consumer(queue)
p_1.start()
c_1.start()
c_2.start()
c_3.start()
p_1.join()
c_1.join()
c_2.join()
c_3.join()