这是一个基于threading可停止线程的有限容量有限并行度的python任务管理器
这是一个可停止线程的有限容量有限并行度的任务管理器
基于:GitHub - AlitaIcon/StopableThreadJob: 可停止线程任务管理器
Quick Start
基础调用与效果
import time
import datetime
from loguru import logger
from StopableThreadJob.job_manager import JobManager
if __name__ == '__main__':
def slow_func( name):
for i in range(5):
logger.info(f"{name} -- {datetime.datetime.now()}")
time.sleep(1)
job_manager = JobManager()
# 删除未添加任务
job_manager.remove_job('2')
for pid in range(6):
logger.info(f"添加任务: {pid}")
job_manager.add_job(target=slow_func, args=(pid,), job_id=f'{pid}')
time.sleep(1)
job_manager.start_job()
# 删除已添加运行中任务
job_manager.remove_job('1')
# 删除已添加未运行中任务
job_manager.remove_job('4')
time.sleep(5)
# 删除运行完成任务
job_manager.remove_job('0')
job_manager.print_current_job()
print(job_manager.job_store)
for i in [0, 1, 2, 4]:
logger.info(f"添加任务: {i}")
job_manager.add_job(target=slow_func, args=(i,), job_id=f'{i}')
job_manager.print_current_job()
job_manager.start_job()
time.sleep(6)
print(job_manager.job_store)
job_manager.print_current_job()
time.sleep(30)
文件job_manager
import ctypes
import threading
from loguru import logger
class TerminableThread(threading.Thread):
"""
a thread that can be stopped by forcing an exception in the execution context
可以通过在执行上下文中强制异常来停止的线程
"""
def terminate(self, exception_cls, repeat_sec=2.0):
if self.is_alive() is False:
return True
killer = ThreadKiller(self, exception_cls, repeat_sec=repeat_sec)
killer.start()
class ThreadKiller(threading.Thread):
"""
separate thread to kill TerminableThread
单独的线程来终止可终止线程
"""
def __init__(self, target_thread, exception_cls, repeat_sec=2.0):
threading.Thread.__init__(self)
self.target_thread = target_thread
self.exception_cls = exception_cls
self.repeat_sec = repeat_sec
self.daemon = True
def run(self):
"""loop raising exception incase it's caught hopefully this breaks us far out"""
while self.target_thread.is_alive():
ctypes.pythonapi.PyThreadState_SetAsyncExc(ctypes.c_long(self.target_thread.ident),
ctypes.py_object(self.exception_cls))
self.target_thread.join(self.repeat_sec)
# 自定义错误类型:
class StopRunningCommand(Exception):
pass
class JobManagerID:
"""
任务ID池,用于初始化任务ID列表,
"""
def __init__(self, pool_size=5):
self.pid_list = list(range(pool_size))
def list_move(self):
# 将pid_list 列表循环左移一位,既列表第一项移动至末尾
b = self.pid_list[:1][0]
c = self.pid_list[1:]
c.append(b)
self.pid_list = c
# 主要的任务调用对象类
class JobManager:
def __init__(self, semaphore=2):
"""
:param semaphore: 任务池中可并行的任务数
"""
self.job_store = {}
self.job_lock = threading.RLock()
self.semaphore = threading.Semaphore(semaphore)
def add_job(self, job_id, target, *args, **kwargs):
# 新增指定ID的任务
def inner_job(*args, **kwargs):
try:
self.semaphore.acquire()
ret = target(*args, **kwargs)
print(f"{job_id} is finished.")
return ret
except StopRunningCommand as e:
print(f"{job_id} has been stopped.")
except Exception as e:
print(f"{job_id} is finished.")
raise e
finally:
if job_id in self.job_store:
self.job_store.pop(job_id) # 运行完毕后在job_store中删除任务
self.semaphore.release()
with self.job_lock:
t = TerminableThread(target=inner_job, *args, **kwargs)
t.daemon = True
# if job_id in self.job:
# self.job[job_id].terminate(StopRunningCommand)
self.job_store[job_id] = t
return self.job_store[job_id]
def remove_job(self, job_id):
# 删除指定ID的任务
with self.job_lock:
if job_id in self.job_store:
self.job_store[job_id].terminate(StopRunningCommand)
def start_job(self):
# 开始任务池中全部的任务,当任务执行较快时会出现该循环还未结束但已经有任务结束了,
# 从而导致循环的字典发生变化导致错误
with self.job_lock:
for j, t in self.job_store.items():
if t.is_alive() is False:
t.start()
def start_job_id(self,pid):
# 指定id开始执行任务
with self.job_lock:
if self.job_store[pid].is_alive() is False:
self.job_store[pid].start()
def job_start(self,pid):
# 返回指定id的任务当前状态,True为正在计算
return self.job_store[pid].is_alive()
def print_current_job(self):
# 返回指定任务池中全部的任务的当前状态,True为正在计算
info = {jid: t.is_alive() for jid, t in self.job_store.items()}
logger.info(info)
为实现任务运行异步且可并行的效果
定义的方法函数
from StopableThreadJob.job_manager import *
job_manager = JobManager(semaphore=4)
job_manager_list = JobManagerID(pool_size = 5)
# pool_size 用于设置任务池容量的大小
# semaphore 用于设置并行度,既任务池中可同时计算的任务数
def job():
def slow_func(name):
for i in range(10):
logger.info(f"{name} -- {datetime.datetime.now()}")
time.sleep(1)
pid = job_manager_list.pid_list[0]
if pid in job_manager.job_store:
job_manager.remove_job(pid)
job_manager.add_job(target=slow_func, args=(pid,), job_id=pid)
job_manager.start_job_id(pid)
job_manager_list.list_move()
- pool_size 用于设置任务池容量的大小
- semaphore 用于设置并行度,既任务池中可同时计算的任务数
实现有限的任务池以及有限的并行度的计算模块。
当任务池满的时候新的任务会将旧的任务挤出任务池。
所有的任务都在任务池中排队,根据并行度决定同时计算的数目。