Python multiprocess 多进程共用 queue,特定情况下 join 不明阻塞问题

如下示例,

import multiprocessing
import random


def worker(base_num, queue):
    for _ in range(1000):
        queue.put(base_num + random.randint(0, 99))
    print('{} end'.format(multiprocessing.current_process().name))


if __name__ == '__main__':
    q = multiprocessing.Queue()
    proc_list = []

    # for i in range(1, 7):  # good
    for i in range(1, 8):  # bad
        proc = multiprocessing.Process(target=worker, args=(1000 * i, q))
        proc_list.append(proc)

    for proc in proc_list:
        proc.start()

    for proc in proc_list:
        print('{} join start'.format(proc.name))
        proc.join()
        print('{} join end'.format(proc.name))

    while not q.empty():
        print(q.get(), end=', ')

在我当前电脑上测试,当 range(1, 7) 时,程序正常结束, 输出为,

$ python3 multiprocess_with_queue__join_stuck.py 
Process-1 join start
Process-2 end
Process-1 end
Process-4 end
Process-3 end
Process-5 end
Process-6 end
Process-1 join end
Process-2 join start
Process-2 join end
Process-3 join start
Process-3 join end
Process-4 join start
Process-4 join end
Process-5 join start
Process-5 join end
Process-6 join start
Process-6 join end
1007, 1076, 1022, 1016, 1045, 1029, 1066, 1064, 1009, 1061, 1052, 1097, 1021, 1067, 1026, 1002, 1034, 1054, 1014, 1006, 1064, 1083, 1021, 1082, 1043, 1071, 1053, 1016, 1056, 1048, 1063, 1089, 1088, 1084, 1004, 1055, 1011, 1080, 1031, 1056, 1027, 1050, 1072, 1021, 2005, 2041, 2036, 2017, 2002, 2022, 2094, 2072, 2011, 2006, 2073, 2058, 2049, 2073, 2072, 2012, 2047, 2097, 2009, 2018, 2044, 2064, 2082, 2028, 2046, 2012, 2085, 2085, 2030, 2047, 2024, 2081, 2075, 2057, 2042, 2036, 2064, 2098, 2072, 2075, 2066, 2095, 2061, 2026, 2003, 2082, 2046, 2031, 2076, 2060, 2020, 2066, 2009, 2005, 2069, 2023, 2030, 2062, 2063, 2007, 2086, 2012, 2086, 2049, 2075, 2077, 2098, 2080, 2088, 2016, 2011, 2060, 2026, 2030, 2074, 2074, 2099, 2078, 2035, 2016, 2095, 2065, 2088, 2055, 2007, 2006, 2058, 2080, 2025, 2086, 2061, 2086, 2023, 2002, 2063, 2022, 2031, 2038, 2015, 2045, 2080, 2067, 2021, 2099, 2025, 2040, 2092, 3038, 2046, 2043, 2058, 2091, 2007, 2069, 2095, 2066, 2007, 2083, 2046, 2009, 2012, 2001, 2071, 2075, 2002, 2099, 3029, 3050, 1058, 1017, 1092, 1084, 1057, 1024, 1047, 6072, 6041, 6031, 6012, 1019, 5095, 5027, 6040, 6053, 6026, 6098, 4058, 4015, 1053, 1010, 1042, 1003, 3065, 4061, 2035, 2003, 3019, 3070, 1000, 1053, 1016, 1061, 3023, 3031, 3032, 3010, 3057, 1025, 1008, 1085, 1082, 3046, 3077, 3047, 3061, 3015, 3071, 2076, 2077, 2037, 2068, 6088, 6013, 2011, 2064, 2021, 2096, 2041, 2031, 2091, 2099, 2063, 2022, 2032, 2072, 6048, 6010, 6068, 6063, 3095, 5048, 5030, 5054, 5036, 5048, 5024, 3064, 1031, 3001, 2030, 2003, 2031, 3051, 2047, 2096, 2038, 2056, 2026, 6078, 6012, 6092, 6042, 3095, 3048

当 range(1, 8) 时,发生不明 join 阻塞,

$ python3 multiprocess_with_queue__join_stuck.py 
Process-1 join start
Process-1 end
Process-2 end
Process-6 end
Process-7 end
Process-4 end
Process-5 end
Process-3 end
Process-1 join end
Process-2 join start

从文档可见 Queue 的实现依赖于系统的 pipe,而 pipe 的缓冲区默认比较小(KB 级),所以放入太多消息,会出现问题。所以,Queue 一有内容,就要尽快取出来。

 

而要在队列间共享内容,可以使用 multiprocessing.manager, 如下例,

import multiprocessing
import random


# def worker(base_num, queue):
#     for _ in range(1000):
#         queue.put(base_num + random.randint(0, 99))
#     print('{} end'.format(multiprocessing.current_process().name))

def worker(base_num, list_obj):
    for _ in range(1000):
        list_obj.append(base_num + random.randint(0, 99))
    print('{} end'.format(multiprocessing.current_process().name))


if __name__ == '__main__':
    # q = multiprocessing.Queue()
    mgr = multiprocessing.Manager()
    list_obj = mgr.list()

    proc_list = []

    # for i in range(1, 7):  # good
    # for i in range(1, 8):  # bad
    for i in range(1, 11):  # bad
        # proc = multiprocessing.Process(target=worker, args=(1000 * i, q))
        proc = multiprocessing.Process(target=worker, args=(1000 * i, list_obj), name='process-{}'.format(i))
        proc_list.append(proc)

    for proc in proc_list:
        proc.start()

    for proc in proc_list:
        print('{} join start'.format(proc.name))
        proc.join()
        print('{} join end'.format(proc.name))

    # while not q.empty():
    #     print(q.get(), end=', ')

    # for item in list_obj:
    #     print(item, end=', ')
    print('len is: ', len(list_obj))

 

 (完)

 

posted @ 2022-07-22 18:43  Anonymous596  阅读(439)  评论(0编辑  收藏  举报