JAVA程序CPU占用过高问题排查
top 命令查看CPU、内存等使用情况
[root@server110 ~]# top
top - 14:24:55 up 23 days, 20:34, 2 users, load average: 0.38, 0.15, 0.05
Tasks: 103 total, 1 running, 102 sleeping, 0 stopped, 0 zombie
Cpu(s): 53.2%us, 4.8%sy, 0.0%ni, 40.7%id, 0.5%wa, 0.0%hi, 0.2%si, 0.7%st
Mem: 8093100k total, 7971832k used, 121268k free, 260456k buffers
Swap: 8093692k total, 641916k used, 7451776k free, 1069476k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
11034 root 20 0 4875m 1.4g 14m S 115.4 17.7 10:35.31 java
3985 root 20 0 0 0 0 S 0.7 0.0 0:22.25 flush-253:2
8628 root 20 0 5055m 896m 7048 S 0.7 11.3 68:57.14 java
3326 root 20 0 4659m 798m 6804 S 0.3 10.1 146:57.13 java
12947 root 20 0 15020 1312 1008 R 0.3 0.0 0:00.21 top
1 root 20 0 19228 544 396 S 0.0 0.0 0:00.76 init
2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kthreadd
定位问题线程
可以根据进程pid查看其下所有的线程,其中tid跟pid一样的为主线程
nid,tid区别
Full thread dump Java HotSpot(TM) 64-Bit Server VM (23.25-b01 mixed mode):
"Attach Listener" daemon prio=10 tid=0x00000000029eb800 nid=0x5cac waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
Locked ownable synchronizers:
- None
tid, Java memory address of its internal Thread control structure.
nid, native thread id. 每一个nid对应于linux下的一个tid, 即lwp (light weight process, or thread).
thread dump 中记录的每个线程的nid是与Linux线程一一对应的,只是十进制与16进制的区别
操作记录
[root@server110 /]# jps
27493 huishi-server.jar
2599 shanghai.schedule.jar
4392 eureka-server.jar
1354 trade.basic.api.jar
3326 urule-server.jar
27566 Jps
27278 huishi-schedule.jar
25935 huishi-api.jar
[root@server110 /]# jstack 27493 >huishi-server.txt
[root@server110 /]# top
top - 18:20:13 up 152 days, 29 min, 5 users, load average: 0.27, 0.66, 0.44
Tasks: 114 total, 1 running, 113 sleeping, 0 stopped, 0 zombie
Cpu(s): 18.2%us, 1.8%sy, 0.0%ni, 79.4%id, 0.0%wa, 0.0%hi, 0.2%si, 0.3%st
Mem: 8093100k total, 7811572k used, 281528k free, 273772k buffers
Swap: 8093692k total, 1157928k used, 6935764k free, 1448060k cached
PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
27493 root 20 0 4998m 1.4g 14m S 37.5 18.7 6:08.10 java
27107 root 20 0 102m 4652 3544 S 1.0 0.1 0:01.19 sshd
27142 root 20 0 98.6m 764 660 S 1.0 0.0 0:01.05 tail
3326 root 20 0 4671m 384m 6656 S 0.7 4.9 983:10.34 java
4 root 20 0 0 0 0 S 0.3 0.0 1:27.72 ksoftirqd/0
1354 root 20 0 4646m 604m 6756 S 0.3 7.6 248:06.48 java
2599 root 20 0 4647m 349m 6480 S 0.3 4.4 316:24.90 java
9526 root 20 0 145m 7828 988 S 0.3 0.1 226:05.22 redis-server
27605 root 20 0 4739m 1.1g 13m S 0.3 13.8 1:38.09 java
27782 root 20 0 15020 1328 1004 R 0.3 0.0 0:00.05 top
1 root 20 0 19228 524 356 S 0.0 0.0 0:01.13 init
2 root 20 0 0 0 0 S 0.0 0.0 0:00.00 kthreadd
3 root RT 0 0 0 0 S 0.0 0.0 0:01.52 migration/0
[root@server110 /]# ps -mq 27493 -o THREAD,tid,time
USER %CPU PRI SCNT WCHAN USER SYSTEM TID TIME
root 65.5 - - - - - - 00:06:21
root 0.0 19 - futex_ - - 27493 00:00:00
root 29.4 19 - futex_ - - 27494 00:02:51
root 0.7 19 - futex_ - - 27495 00:00:04
root 0.7 19 - futex_ - - 27496 00:00:04
root 0.4 19 - futex_ - - 27497 00:00:02
root 0.0 19 - futex_ - - 27498 00:00:00
root 0.0 19 - futex_ - - 27499 00:00:00
root 0.0 19 - futex_ - - 27500 00:00:00
root 25.0 19 - futex_ - - 27501 00:02:26
root 3.1 19 - futex_ - - 27502 00:00:18
[root@server110 /]# printf '%x\n' 27494
6b66
[root@server110 /]# grep 6b66 huishi-server5.txt -A20
"DestroyJavaVM" #75 prio=5 os_prio=0 tid=0x00007f2294008800 nid=0x6b66 waiting on condition [0x0000000000000000]
java.lang.Thread.State: RUNNABLE
"http-nio-8083-AsyncTimeout" #72 daemon prio=5 os_prio=0 tid=0x00007f222f376800 nid=0x6bf3 waiting on condition [0x00007f221f9bc000]
java.lang.Thread.State: TIMED_WAITING (sleeping)
at java.lang.Thread.sleep(Native Method)
at org.apache.coyote.AbstractProtocol$AsyncTimeout.run(AbstractProtocol.java:1200)
at java.lang.Thread.run(Thread.java:745)
"http-nio-8083-Acceptor-0" #71 daemon prio=5 os_prio=0 tid=0x00007f22957f8800 nid=0x6bf2 runnable [0x00007f221fabd000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.ServerSocketChannelImpl.accept0(Native Method)
at sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:422)
at sun.nio.ch.ServerSocketChannelImpl.accept(ServerSocketChannelImpl.java:250)
- locked <0x0000000090753350> (a java.lang.Object)
at org.apache.tomcat.util.net.NioEndpoint$Acceptor.run(NioEndpoint.java:453)
at java.lang.Thread.run(Thread.java:745)
"http-nio-8083-ClientPoller-1" #70 daemon prio=5 os_prio=0 tid=0x00007f2235866000 nid=0x6bf1 runnable [0x00007f221fbbe000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
定位问题进程,使用top命令查找最耗资源的进程pid
定位问题线程,根据pid,找出其中最耗资源的线程tid
定位文本日志,再将tid转换成16进制,再查找相关的日志文本信息
ps命令查看线程
查看ps命令跟线程相关的参数
[root@server110 /]# man ps |grep thread
To get info about threads:
threads) and LWP (thread ID) columns will be added. See
H Show threads as if they were processes
-L Show threads, possibly with LWP and NLWP columns
-T Show threads, possibly with SPID column
m Show threads after processes
-m Show threads after processes
the page tables, kernel stack, struct thread_info, and struct
l is multi-threaded (using CLONE_THREAD, like NPTL pthreads do)
lwp LWP lwp (light weight process, or thread) ID of the lwp being
nlwp NLWP number of lwps (threads) in the process. (alias thcount).
on individual threads. Use the m option or the -m option
thcgr THCGR display control groups to which the thread belongs.
thcount THCNT see nlwp. (alias nlwp). number of kernel threads owned by
process is multi-threaded and ps is not displaying
threads.
-L参数
[root@server110 /]# ps -efL |grep huishi-server
UID PID PPID LWP C NLWP STIME TTY TIME CMD
root 32195 1 32195 0 62 10:47 ? 00:00:00 /usr/java/jdk1.8.0_91/bin/java -jar /home/huishi-server-dev/huishi-server.jar --server.port=8083
root 32195 1 32196 9 62 10:47 ? 00:02:58 /usr/java/jdk1.8.0_91/bin/java -jar /home/huishi-server-dev/huishi-server.jar --server.port=8083
root 32195 1 32197 0 62 10:47 ? 00:00:03 /usr/java/jdk1.8.0_91/bin/java -jar /home/huishi-server-dev/huishi-server.jar --server.port=8083
。。。。。
root 32458 31940 32458 0 1 11:18 pts/2 00:00:00 grep huishi-server
-L
参数显示进程,并尽量显示其LWP
(线程ID)和NLWP
(线程的个数)。上面命令查询结果的第二列为PID,第三列为PPID,第四列为LWP,第六列为NLWP。
ps
属于linux下相对比较全能且强大的命令,其还可以查看具体某个线程运行在哪个CPU上,如下:
[root@server110 /]# ps -eo ruser,pid,ppid,lwp,psr,args -L | grep huishi-server
root 32195 1 32195 1 /usr/java/jdk1.8.0_91/bin/java -jar /home/huishi-server-dev/huishi-server.jar --server.port=8083
root 32195 1 32196 1 /usr/java/jdk1.8.0_91/bin/java -jar /home/huishi-server-dev/huishi-server.jar --server.port=8083
root 32195 1 32197 0 /usr/java/jdk1.8.0_91/bin/java -jar /home/huishi-server-dev/huishi-server.jar --server.port=8083
root 32195 1 32198 0 /usr/java/jdk1.8.0_91/bin/java -jar /home/huishi-server-dev/huishi-server.jar --server.port=8083
。。。。。。
root 32474 31940 32474 1 grep huishi-server
每一列依次为:用户ID,进程ID,父进程ID,线程ID,运行该线程的CPU的序号,命令行参数(包括命令本身)
-T参数
$ ps -T 9802
PID SPID TTY STAT TIME COMMAND
9802 9802 pts/1 Sl 0:00 ./a.out
9802 9803 pts/1 Sl 0:00 ./a.out
9802 9804 pts/1 Sl 0:00 ./a.out
9802 9805 pts/1 Sl 0:00 ./a.out
其中SPID
即为TID
。可以看到当前进程的PID
是9802
,共包含4
个线程,其TID
依次为:9802
,9803
,9804
和9805
,其中PID
和SPID
相同的线程即为主线程。
-m参数
[root@server110 /]# ps -mq 32195 -o tid,time,comm,ruser,pid,ppid,lwp,psr,args,THREAD
TID TIME COMMAND RUSER PID PPID LWP PSR COMMAND USER %CPU PRI SCNT WCHAN USER SYSTEM
- 00:08:00 java root 32195 1 - - /usr/java/jdk1.8.0_91/bin/j root 4.1 - - - - -
32195 00:00:00 - root - - 32195 1 - root 0.0 19 - futex_ - -
32196 00:02:58 - root - - 32196 1 - root 1.5 19 - futex_ - -
32197 00:00:04 - root - - 32197 0 - root 0.0 19 - futex_ - -
32198 00:00:04 - root - - 32198 1 - root 0.0 19 - futex_ - -
32199 00:00:05 - root - - 32199 1 - root 0.0 19 - futex_ - -
32200 00:00:00 - root - - 32200 1 - root 0.0 19 - futex_ - -
32201 00:00:00 - root - - 32201 1 - root 0.0 19 - futex_ - -
32202 00:00:00 - root - - 32202 0 - root 0.0 19 - futex_ - -
32203 00:02:46 - root - - 32203 1 - root 1.4 19 - futex_ - -
32204 00:00:24 - root - - 32204 0 - root 0.2 19 - futex_ - -
32205 00:00:00 - root - - 32205 1 - root 0.0 19 - futex_ - -
32206 00:00:10 - root - - 32206 1 - root 0.0 19 - futex_ - -
32208 00:00:00 - root - - 32208 0 - root 0.0 19 - futex_ - -
32211 00:00:00 - root - - 32211 1 - root 0.0 19 - futex_ - -
32212 00:00:00 - root - - 32212 1 - root 0.0 19 - futex_ - -
32214 00:00:00 - root - - 32214 0 - root 0.0 19 - futex_ - -
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 【杭电多校比赛记录】2025“钉耙编程”中国大学生算法设计春季联赛(1)
2013-04-26 一个HttpHandler的小设计