记录一次线上异步线程死锁问题
实时场景:
接口性能:114台线上机器 TPS峰值 3.7W TP99 50ms内
单机性能: TPS 340 TP99 50ms内
核心线程: 30
故障:黑名单打标开始TPS会由原来的3.7w 降为2.2w
过程:查看日志有机器线程池打满状态,下掉机器,TPS恢复正常。通过jstack查看线程状态,发现线程死锁,消除异步线程嵌套带来的死锁问题。
原因:由于TPS较高,核心线程数不高的同时,异步线程ExecutorCompletionService 嵌套且共用一个线程,导致资源竞争产生死锁,进而导致线程池打满,线程池未打满的机器也会存在线程阻塞问题(线程池数小,线程嵌套)。
本地场景复现
场景1:
1.使用线程池
2.使用使用异步线程
3.线程池核心线程1个
4.异步线程父线程套子线程 共2个线程
5.父线程睡3秒
6.猜想结果 死锁 阻塞
public static void Test1() throws ExecutionException, InterruptedException { ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(1, 1, 0, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy()); CompletionService<String> fatherService = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> fatherSubmit = fatherService.submit(() -> { System.out.println("进入父线程"); try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } CompletionService<String> sonService = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit = sonService.submit(() -> { System.out.println("进入子线程"); return "子线程响应结果"; }); String sonStr = sonSubmit.get(); System.out.println("sonStr = " + sonStr); System.out.println("---父线程结束---"); return "父线程应结果"; }); String fatherStr = fatherSubmit.get(); System.out.println("fatherStr = " + fatherStr); }
结论:线程死锁
原因:线程池只有一个线程,异步线程互相嵌套,父线程启动,占用一个线程,子线程分配不到线程进而阻塞,由于子线程阻塞导致父线程也阻塞,导致线程死锁。
场景2:
1.使用线程池
2.使用使用异步线程
3.线程池核心线程2个
4.异步线程父线程套3子线程 共4个线程
5.父线程睡3秒 子线程睡3秒
6.猜想所有线程执行完需9秒
public static void test2() throws ExecutionException, InterruptedException { ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(2, 9, 0, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy()); CompletionService<String> fatherService = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> fatherSubmit = fatherService.submit(() -> { System.out.println("进入父线程"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } CompletionService<String> sonService1 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit1 = sonService1.submit(() -> { System.out.println("进入子线程1"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程1响应结果"; }); CompletionService<String> sonService2 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit2 = sonService2.submit(() -> { System.out.println("进入子线程2"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程2响应结果"; }); CompletionService<String> sonService3 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit3 = sonService3.submit(() -> { System.out.println("进入子线程3"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程3响应结果"; }); String son1Str = sonSubmit1.get(); System.out.println("son1Str = " + son1Str); String son2Str = sonSubmit2.get(); System.out.println("son2Str = " + son2Str); String son3Str = sonSubmit3.get(); System.out.println("son3Str = " + son3Str); System.out.println("---父线程结束---"); return "父线程应结果"; }); String fatherStr = fatherSubmit.get(); System.out.println("fatherStr = " + fatherStr); }
结论:所有线程执行完9S
原因:主线程占用一个核心线程,三个子线程共同使用一个核心线程,谁竞争到谁先执行,其次后执行,三个线程相当于串行执行,但是三个子线程与主线程是并行关系,
所有需要9s整个方法test2执行完毕。
场景2:
1.使用线程池
2.使用使用异步线程
3.线程池核心线程3个
4.异步线程父线程套3子线程 共4个线程
5.父线程睡3秒 子线程睡3秒
6.猜想所有线程执行完需9秒
public static void test3() throws ExecutionException, InterruptedException { ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(3, 9, 0, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy()); CompletionService<String> fatherService = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> fatherSubmit = fatherService.submit(() -> { System.out.println("进入父线程"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } CompletionService<String> sonService1 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit1 = sonService1.submit(() -> { System.out.println("进入子线程1"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程1响应结果"; }); CompletionService<String> sonService2 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit2 = sonService2.submit(() -> { System.out.println("进入子线程2"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程2响应结果"; }); CompletionService<String> sonService3 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit3 = sonService3.submit(() -> { System.out.println("进入子线程3"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程3响应结果"; }); String son1Str = sonSubmit1.get(); System.out.println("son1Str = " + son1Str); String son2Str = sonSubmit2.get(); System.out.println("son2Str = " + son2Str); String son3Str = sonSubmit3.get(); System.out.println("son3Str = " + son3Str); System.out.println("---父线程结束---"); return "父线程应结果"; }); String fatherStr = fatherSubmit.get(); System.out.println("fatherStr = " + fatherStr); }
结论:所有线程执行完9S
原因:主线程占用一个核心线程,两个子线程各分的一个核心线程,当一个子线程释放核心线程,第三个线程开始执行,
所有两个子线程和第三个线程相当于串行,然后又与主线程串行,所以共需9s。
场景3:
1.使用线程池
2.使用使用异步线程
3.线程池核心线程9个
4.异步线程父线程套3子线程 共4个线程
5.父线程睡3秒 子线程睡3秒
6.猜想所有线程执行完需6秒
public static void test4() throws ExecutionException, InterruptedException { ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(9, 9, 0, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(1), Executors.defaultThreadFactory(), new ThreadPoolExecutor.CallerRunsPolicy()); CompletionService<String> fatherService = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> fatherSubmit = fatherService.submit(() -> { System.out.println("进入父线程"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } CompletionService<String> sonService1 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit1 = sonService1.submit(() -> { System.out.println("进入子线程1"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程1响应结果"; }); CompletionService<String> sonService2 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit2 = sonService2.submit(() -> { System.out.println("进入子线程2"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程2响应结果"; }); CompletionService<String> sonService3 = new ExecutorCompletionService<>(threadPoolExecutor); Future<String> sonSubmit3 = sonService3.submit(() -> { System.out.println("进入子线程3"); try { Thread.sleep(3000); } catch (InterruptedException e) { e.printStackTrace(); } return "子线程3响应结果"; }); String son1Str = sonSubmit1.get(); System.out.println("son1Str = " + son1Str); String son2Str = sonSubmit2.get(); System.out.println("son2Str = " + son2Str); String son3Str = sonSubmit3.get(); System.out.println("son3Str = " + son3Str); System.out.println("---父线程结束---"); return "父线程应结果"; }); String fatherStr = fatherSubmit.get(); System.out.println("fatherStr = " + fatherStr); }
结论:所有线程执行完6S
原因:主线程占用一个核心线程,三个子线程各分的一个核心线程,三个与主线程是串行关系,所以共需6s。