CPU 利用率过高定位分析
概述
最近接手的一个项目,服务端的 cpu 利用率一直爆表。不做任何压测,利用率也保持在 200% 左右
分析
从 top 里面可以看出几个问题
1:负载几乎为 0,可以排除线程过多的问题
2:usr 的 cpu 利用率几乎是 sy 的三倍,初步怀疑代码层面有问题
3:IOwait 几乎没有,可以排除 IO 阻塞的问题
执行 vmstat 1 10
1:r+b 的队列不长
2:bi+bo 几乎为 0
3:in+cs 也不高
4:wa 几乎为 0
执行 mpstat -P ALL 1
1:逻辑 cpu 的利用率分布均匀
2:逻辑 cpu 的 usr 利用率明显高于 sys,下一步需要去定位线程日志
jstack 6964 | grep 1b5d -A 200
"DubboSaveRegistryCache-thread-1" #45 daemon prio=5 os_prio=0 tid=0x00007f3c24fcc000 nid=0x6fba runnable [0x00007f3c216c1000]
java.lang.Thread.State: RUNNABLE
at java.lang.String.substring(String.java:1933)
at java.io.File.getName(File.java:456)
at java.io.UnixFileSystem.getBooleanAttributes(UnixFileSystem.java:243)
at java.io.File.exists(File.java:819)
at com.alibaba.dubbo.registry.support.AbstractRegistry.doSaveProperties(AbstractRegistry.java:184)
at com.alibaba.dubbo.registry.support.AbstractRegistry$SaveProperties.run(AbstractRegistry.java:150)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"ZkClient-EventThread-42-10.10.1.16:2181,10.10.1.17:2181,10.10.1.18:2181" #42 daemon prio=5 os_prio=0 tid=0x00007f3c24fff000 nid=0x6fae waiting on condition [0x00007f3c219c4000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c38ff950> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.I0Itec.zkclient.ZkEventThread.run(ZkEventThread.java:67)
"DubboRegistryFailedRetryTimer-thread-1" #41 daemon prio=5 os_prio=0 tid=0x00007f3c24ffc000 nid=0x6fad waiting on condition [0x00007f3c21ac5000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c38fdc50> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"DubboSaveRegistryCache-thread-1" #38 daemon prio=5 os_prio=0 tid=0x00007f3c24ff9000 nid=0x6fa3 waiting on condition [0x00007f3c21fc8000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c38ffd88> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"localhost-startStop-1-EventThread" #37 daemon prio=5 os_prio=0 tid=0x00007f3c24ff8000 nid=0x6fa2 waiting on condition [0x00007f3c220c9000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c39003b0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:494)
"localhost-startStop-1-SendThread(10.200.2.1:2181)" #36 daemon prio=5 os_prio=0 tid=0x00007f3c24fdc000 nid=0x6fa1 runnable [0x00007f3c221ca000]
java.lang.Thread.State: RUNNABLE
at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:79)
at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
- locked <0x00000000c390f150> (a sun.nio.ch.Util$2)
- locked <0x00000000c390f140> (a java.util.Collections$UnmodifiableSet)
- locked <0x00000000c390ecd8> (a sun.nio.ch.EPollSelectorImpl)
at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
at org.apache.zookeeper.ClientCnxnSocketNIO.doTransport(ClientCnxnSocketNIO.java:349)
at org.apache.zookeeper.ClientCnxn$SendThread.run(ClientCnxn.java:1081)
"ZkClient-EventThread-35-10.200.2.1:2181" #35 daemon prio=5 os_prio=0 tid=0x00007f3c24fda800 nid=0x6fa0 waiting on condition [0x00007f3c222cb000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c390d1d0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
at org.I0Itec.zkclient.ZkEventThread.run(ZkEventThread.java:67)
"DubboRegistryFailedRetryTimer-thread-1" #34 daemon prio=5 os_prio=0 tid=0x00007f3c24fd9800 nid=0x6f9f waiting on condition [0x00007f3c223cc000]
java.lang.Thread.State: TIMED_WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c3909788> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"dubbo-remoting-client-heartbeat-thread-2" #33 daemon prio=5 os_prio=0 tid=0x00007f3c24fc1800 nid=0x6f9e waiting on condition [0x00007f3c224cd000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c38fa2b0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1088)
at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
"DubboClientReconnectTimer-thread-2" #31 daemon prio=5 os_prio=0 tid=0x00007f3c24fb1800 nid=0x6f9c waiting on condition [0x00007f3c226cf000]
java.lang.Thread.State: WAITING (parking)
at sun.misc.Unsafe.park(Native Method)
- parking to wait for <0x00000000c38ced20> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
at java.util.concurrent.ScheduledThreadPoolExecutor