运维自动导出业务容器Java堆栈错误日志脚本
运维自动导出业务容器Java堆栈日志脚本
引
针对于线上
Docker
业务容器启动失败,快速导出业务容器的Java
堆栈日志,帮助开发快速定位、解决问题。注意:线上业务容器需要带有
JDK
的基础镜像,才支持导出容器的命令,比如jstack
抓取效果
# 若服务器不存在此服务,则:
[root@h119 ~]# sh scripts/export_java_log.sh
>>>>>>>>>>>>>>>> 线上业务Java堆栈日志导出工具 <<<<<<<<<<<<<<<<
>>>>>>>>>>>>>>>>>>>>>> 注意:打印10000行 <<<<<<<<<<<<<<<<<<<<
-------------------- 不带pro的为预生产服务 -------------------
1) ehu-life 3) ehu-job 5) 退出
2) ehu-life-cms 4) ehu-life-cms-pro
请输入功能编号来选择容器>>> 1
业务容器【ehu-life】不存在或未启动,请手动检查服务状态!
# 若服务器存在此服务,则:
[root@h119 ~]# sh scripts/export_java_log.sh
>>>>>>>>>>>>>>>> 线上业务Java堆栈日志导出工具 <<<<<<<<<<<<<<<<
>>>>>>>>>>>>>>>>>>>>>> 注意:打印10000行 <<<<<<<<<<<<<<<<<<<<
-------------------- 不带pro的为预生产服务 -------------------
1) ehu-life 3) ehu-job 5) 退出
2) ehu-life-cms 4) ehu-life-cms-pro
请输入功能编号来选择容器>>> 4
正在导出日志,请稍等...
导出成功!
目标文件: /docker-log/h119-life-cms-pro-2022-08-10_15:41-java.log
# 查看导出的日志文件:
[root@h119 ~]# ll /docker-log/
-rw-r--r-- 1 dev dev 152K Aug 9 15:04 h119-life-cms-pro-2022-08-10_15:41-java.log
# 查看日志内容:
[root@h119 ~]# tail -20 /docker-log/h119-life-cms-pro-2022-08-10_15\:41-java.log
- waiting on <0x00000006c7def300> (a java.lang.ref.Reference$Lock)
at java.lang.Object.wait(Object.java:502)
at java.lang.ref.Reference.tryHandlePending(Reference.java:191)
- locked <0x00000006c7def300> (a java.lang.ref.Reference$Lock)
at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:153)
"VM Thread" os_prio=0 tid=0x00007f3c3413e800 nid=0xc runnable
"GC task thread#0 (ParallelGC)" os_prio=0 tid=0x00007f3c3401e800 nid=0x8 runnable
"GC task thread#1 (ParallelGC)" os_prio=0 tid=0x00007f3c34020800 nid=0x9 runnable
"GC task thread#2 (ParallelGC)" os_prio=0 tid=0x00007f3c34022000 nid=0xa runnable
"GC task thread#3 (ParallelGC)" os_prio=0 tid=0x00007f3c34024000 nid=0xb runnable
"VM Periodic Task Thread" os_prio=0 tid=0x00007f3c34194800 nid=0x14 waiting on condition
JNI global references: 2648
Dockerfile案例
线上业务容器的基础镜像需要有
jdk
,apline
不支持jstack
导出堆栈日志,可用orkjdk
FROM reg.xxx.com/base/spring-boot-orjdk8:v1
MAINTAINER ZhangZhenPeng
# 时区
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
RUN echo 'Asia/Shanghai' >/etc/timezone
# Java
RUN rm -rf /opt/soft/java/jdk/jdk1.8.0_131
ADD oracle-jdk-8u171-linux-x64.tar.gz /opt/soft/java/jdk/
# 环境变量,echo和ENV都可用,建议ENV
#RUN echo 'export PATH=$PATH:/opt/soft/java/jdk/jdk1.8.0_171/bin' && source /etc/profile
ENV PATH=$PATH:/opt/soft/java/jdk/jdk1.8.0_171/bin
# 集成arthas
COPY --from=hengyunabc/arthas:latest /opt/arthas /opt/arthas
export_java_log.sh
Java
堆栈日志自动导出脚本
#!/bin/bash
# 用于导出线上业务容器的Java堆栈日志
# 容器:ehu-life、ehu-life-cms、ehu-job
DIR=/docker-log
if [ ! -d $DIR ];then
mkdir $DIR
fi
ehu_life() {
Server="ehu-life"
DATE=`date +%F'_%H:%M'`
TOP_ID_CMD='1'
#TOP_ID_CMD=`docker exec -it $Server bash -c "top -d 2 -n 1 -b | awk 'NR>7 && NR<9{print}'" | awk '{print $1}'`
HOSTNAME=`hostname`
LOG_FILE=$HOSTNAME-life-${DATE}-java.log
DIR=/docker-log
docker ps | grep 8886 &>/dev/null
if [ $? -eq 0 ];then
echo "正在导出日志,请稍等..."
docker exec -it $Server bash -c "jstack $TOP_ID_CMD | grep $TOP_ID_CMD -A 10000 > $LOG_FILE" &>/dev/null
docker cp $Server:/$LOG_FILE $DIR &>/dev/null
if [ $? -eq 0 ];then
echo "导出成功!"
echo "目标文件: $DIR/$LOG_FILE"
exit
else
echo "导出失败!" && exit
fi
else
echo "业务容器【$Server】不存在或未启动,请手动检查服务状态!"
exit
fi
}
ehu_life_cms() {
Server="ehu-life-cms"
DATE=`date +%F'_%H:%M'`
TOP_ID_CMD='1'
#TOP_ID_CMD=`docker exec -it $Server bash -c "top -d 2 -n 1 -b | awk 'NR>7 && NR<9{print}'" | awk '{print $1}'`
HOSTNAME=`hostname`
LOG_FILE=$HOSTNAME-life-cms-${DATE}-java.log
DIR=/docker-log
docker ps | grep 8887 &>/dev/null
if [ $? -eq 0 ];then
echo "正在导出日志,请稍等..."
docker exec -it $Server bash -c "jstack $TOP_ID_CMD | grep $TOP_ID_CMD -A 10000 > $LOG_FILE" &>/dev/null
docker cp -a $Server:/$LOG_FILE $DIR &>/dev/null
if [ $? -eq 0 ];then
echo "导出成功!"
echo "目标文件: $DIR/$LOG_FILE"
exit
else
echo "导出失败!" && exit
fi
else
echo "业务容器【$Server】不存在或未启动,请手动检查服务状态!"
exit
fi
}
ehu_job() {
Server="ehu-job"
DATE=`date +%F'_%H:%M'`
TOP_ID_CMD='1'
#TOP_ID_CMD=`docker exec -it $Server bash -c "top -d 2 -n 1 -b | awk 'NR>7 && NR<9{print}'" | awk '{print $1}'`
HOSTNAME=`hostname`
LOG_FILE=$HOSTNAME-job-${DATE}-java.log
DIR=/docker-log
docker ps | grep $Server &>/dev/null
if [ $? -eq 0 ];then
echo "正在导出日志,请稍等..."
docker exec -it $Server bash -c "jstack $TOP_ID_CMD | grep $TOP_ID_CMD -A 10000 > $LOG_FILE" &>/dev/null
docker cp $Server:/$LOG_FILE $DIR &>/dev/null
if [ $? -eq 0 ];then
echo "导出成功!"
echo "目标文件: $DIR/$LOG_FILE"
exit
else
echo "导出失败!" && exit
fi
else
echo "业务容器【$Server】不存在或未启动,请手动检查服务状态!"
exit
fi
}
ehu_life_cms_pro() {
Server="ehu-life-cms-pro"
DATE=`date +%F'_%H:%M'`
TOP_ID_CMD='1'
#TOP_ID_CMD=`docker exec -it $Server bash -c "top -d 2 -n 1 -b | awk 'NR>7 && NR<9{print}'" | awk '{print $1}'`
HOSTNAME=`hostname`
LOG_FILE=$HOSTNAME-life-cms-pro-${DATE}-java.log
DIR=/docker-log
docker ps | grep 8889 &>/dev/null
if [ $? -eq 0 ];then
echo "正在导出日志,请稍等..."
docker exec -it $Server bash -c "jstack $TOP_ID_CMD | grep $TOP_ID_CMD -A 10000 > $LOG_FILE" &>/dev/null
docker cp -a $Server:/$LOG_FILE $DIR &>/dev/null
if [ $? -eq 0 ];then
echo "导出成功!"
echo "目标文件: $DIR/$LOG_FILE"
exit
else
echo "导出失败!" && exit
fi
else
echo "业务容器【$Server】不存在或未启动,请手动检查服务状态!"
exit
fi
}
Export() {
clear
echo ">>>>>>>>>>>>>>>> 线上业务Java堆栈日志导出工具 <<<<<<<<<<<<<<<<"
echo ">>>>>>>>>>>>>>>>>>>>>> 注意:打印10000行 <<<<<<<<<<<<<<<<<<<<"
echo "-------------------- 不带pro的为预生产服务 -------------------"
PS3='请输入功能编号来选择容器>>> '
select choice in {"ehu-life","ehu-life-cms","ehu-job","ehu-life-cms-pro","退出"}
do
case $choice in
"ehu-life")
ehu_life
;;
"ehu-life-cms")
ehu_life_cms
;;
"ehu-job")
ehu_job
;;
"ehu-life-cms-pro")
ehu_life_cms_pro
;;
"退出")
exit
esac
done
}
Export