Loading

通过Nagios监控weblogic服务

1.前言

     前段时间搭建了一套Nagios监控服务,心血来潮想自己写一个脚本,拓展Nagios插件来监控公司的weblogic服务。

环境:weblogic10.3.3.0 、 CentOS6.5

1.首先介绍一些用命令行监控weblogic的方法

    对于Weblogic运行状况的获得,我们是通过命令行的方式实现的,通过调用Weblogic的weblogic.Admin类实现的。既然要调用类,就要把/home/weblogic/Oracle/Middleware/wlserver_10.3/server/lib/weblogic.jar包配置导环境变量中。我通过修改原来配置JDK的环境变量中的CLASSPATH.编辑/etc/profile文件,
export CLASSPATH=.:/usr/java/jdk1.6.0_21/lib/dt.jar:/usr/java/jdk1.6.0_21/lib/tools.jar:/home/weblogic/Oracle/Middleware/wlserver_10.3/server/lib/weblogic.jar


1.1 证实weblogic server 是否正常

java weblogic.Admin -url t3://127.0.0.1:6088 -username weblogic1 -password weblogic1 PING count byte

返回结果:


1.2 查看weblogic server的版本

java weblogic.Admin -url t3://127.0.0.1:6088 -username weblogic1 -password weblogic1 VERSION

返回结果:

还有一种方法:java weblogic.version | grep -m 1 "."

weblogic.version | grep -m 1
1.3 获得server的运行状态

java weblogic.Admin -url ${URL} -username ${USER_NAME} -password ${PASS_WORD} get -pretty -mbean "${DOMAIN_NAME}:Location=${SERVER_NAME},Name=${SERVER_NAME},Type=ServerRuntime”

我的机器上的执行命令:

java weblogic.Admin -url t3://localhost:6088 -username weblogic1 -password weblogic1 GET -pretty -mbean "hb_circfnl_dom:Location=AdminServer,Name=AdminServer,Type=ServerRuntime"

返回结果:

1.4获得JDBC Pool运行状态

$ java weblogic.Admin -url ${URL} -username ${USER_NAME} -password ${PASS_WORD} GET -pretty -mbean "${DOMAIN_NAME}:Location=${SERVER_NAME},Name=${POOL_NAME},ServerRuntime=${SERVER_NAME},Type=JDBCConnectionPoolRuntime"

我的执行命令:

java weblogic.Admin -url t3://localhost:6088 -username weblogic1 -password weblogic1 GET -pretty -mbean "hb_circfnl_dom:Location=AdminServer,Name=circDataSource,ServerRuntime=AdminServer,Type=JDBCConnectionPoolRuntime"

返回结果:

2.利用1.3和1.4的方法根据Nagios Plugin API规则编写自己的shell实现脚本。具体的shell脚本如下:

check_weblogic.sh

#! /bin/bash

#check_weblogic.sh --jdbcpool url username password domainname servername poolname
#check_weblogic.sh --server url username password domainname servername

# -H host -p port -v "--type=<[jdbcpool|server]> --username username --password password --domain domainname --server servername [--pool poolname]"

PROGNAME=`basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION=`echo '$Revision: 1749 $' | sed -e 's/[^0-9.]//g'`

. $PROGPATH/utils.sh

print_usage() {
    echo "Usage:"
    echo "  $PROGNAME -H host -p port -v --type [jdbcpool|server] --username username --password password --domain domainname --server servername [--pool poolname] " 
    echo "  $PROGNAME --help"
    echo "  $PROGNAME --version"
}

print_help() {
    print_revision $PROGNAME $REVISION
    echo ""
    print_usage
    echo ""
    echo "Check Weblogic status"
    echo ""  
}

#parse weblogic parameters
parse_wls_para() {
#    echo $@
    while [ -n "$1" ]
    do
        case "$1" in
        --type)
            #type
            TYPE="$2"
            shift
            ;;
        --username)
            USER_NAME="$2"
            shift
            ;;
        --password)
            PASS_WORD="$2"
            shift
            ;;
        --domain)
            DOMAIN_NAME="$2"
            shift
            ;;
        --server)
            SERVER_NAME="$2"
            shift
            ;;
        --pool)
            POOL_NAME="$2"
            shift
            ;;
        *)
            print_usage
            exit $STATE_UNKNOWN
            ;;
        esac
        shift
    done
    #echo "TYPE:"$TYPE
    #echo "USER_NAME:"$USER_NAME
    #echo "PASS_WORD:"$PASS_WORD
    #echo "DOMAIN_NAME:"$DOMAIN_NAME
    #echo "SERVER_NAME:"$SERVER_NAME
    #echO "POOL_NAME:"$POOL_NAME

}

# check weblogic server information
check_wls_server() {
    
    local URL="t3://${HOST_NAME}:${SERVER_PORT}"
    local SERVER_INFO="${DOMAIN_NAME}:${SERVER_NAME}"
    #echo "java weblogic.Admin -url ${URL} -username ${USER_NAME} -password ${PASS_WORD} get -pretty"
    #echo "-mbean "${DOMAIN_NAME}:Location=${SERVER_NAME},Name=${SERVER_NAME},Type=${SERVER_TYPE}"" 
    
    tmpfile=`mktemp -t nagios.XXXXXX`
    #echo "tmpfile"$tmpfile
    java weblogic.Admin -url ${URL} -username ${USER_NAME} -password ${PASS_WORD} get -pretty \
        -mbean "${DOMAIN_NAME}:Location=${SERVER_NAME},Name=${SERVER_NAME},Type=${SERVER_TYPE}" \
        >${tmpfile} 2>&1
    #echo "java weblogic.Admin -url ${URL} -username ${USER_NAME} -password ${PASS_WORD} get -pretty 
    #    -mbean "${DOMAIN_NAME}:Location=${SERVER_NAME},Name=${SERVER_NAME},Type=${SERVER_TYPE}" "
     
    local N=`cat ${tmpfile} | grep ^"-" | wc -l`
    #echo "N:"$N  
    if [ $N -lt  1 ] 
    then
        #error
        #echo "tmpfile"$tmpfile
        #cat ${tmpfile}
        ERR_INFO=`cat ${tmpfile} | awk '{ printf $0 }'`
        #echo "ERR_INFO:"$ERR_INFO
        echo "CRITICAL - ${ERR_INFO}"
        rm -f $tmpfile
        return $STATE_CRITICAL      
        
    fi
    
    if [ $N -ge  1 ] 
    then
        local HEALTH_STATE=""     
        local RUN_STATE=""
        #HealthState State
        while read NAME VALUE
        do
           
            #PoolState WaitingForConnectionCurrentCount State
            #echo "NAME:${NAME} VALUE:${VALUE}"
            case "${NAME}" in
            HealthState:)
              HEALTH_STATE=${VALUE}
            ;;
            State:)
              RUN_STATE=${VALUE}
            ;;
            esac
        done < ${tmpfile}
        
        rm -f $tmpfile
        #echo "HEALTH_STATE:${HEALTH_STATE}"
        #echo "RUN_STATE:${RUN_STATE}"
      
        local HEALTH_STATE_INFO=${HEALTH_STATE}
      
        echo ${HEALTH_STATE_INFO} | awk -F, '{ print $1 }' | awk -F: '{ print $2 }' | read HEALTH_STATE
            
        #echo "HEALTH_STATE:${HEALTH_STATE}"
        #HEALTH_OK HEALTH_WARN HEALTH_CRITICAL HEALTH_FAILED
      
        if [[ "${RUN_STATE}" != "RUNNING" ]]
        then
            echo "CRITICAL - ${SERVER_INFO} State is ${RUN_STATE}"
            return $STATE_CRITICAL  
        fi
      
        case "${HEALTH_STATE}" in
        EALTH_OK)
        
            ;;
        HEALTH_WARN)
            echo "WARN - ${SERVER_INFO} HealthState is ${HEALTH_STATE_INFO}"
            return $STATE_WARNING 
            ;;
        HEALTH_CRITICAL)
            echo "CRITICAL - ${SERVER_INFO} HealthState is ${HEALTH_STATE_INFO}"
            return $STATE_CRITICAL
            ;;
        HEALTH_FAILED)
            echo "FAILED - ${SERVER_INFO} HealthState is ${HEALTH_STATE_INFO}"
            return $STATE_CRITICAL
            ;;
        esac
      
    fi
    echo "OK - ${SERVER_INFO} State is ${RUN_STATE},HealthState is ${HEALTH_STATE_INFO}"
    return $STATE_OK
    
}

# check weblogic jdbc pool information
check_wls_jdbcpool() {
    local URL="t3://${HOST_NAME}:${SERVER_PORT}"
    local POOL_INFO="${DOMAIN_NAME}:${SERVER_NAME}:${POOL_NAME}"
    
    tmpfile=`mktemp -t nagios.XXXXXX`
    java weblogic.Admin -url ${URL} -username ${USER_NAME} -password ${PASS_WORD} GET -pretty \
        -mbean "${DOMAIN_NAME}:Location=${SERVER_NAME},Name=${POOL_NAME},ServerRuntime=${SERVER_NAME},Type=${JDBC_TYPE}" \
        > ${tmpfile} 2>&1
    
    local N=`cat ${tmpfile} | grep ^"-" | wc -l`
    
    if [[ "$N" -lt  "1" ]] 
    then    
        #error
        ERR_INFO=`cat ${tmpfile} | awk '{ printf $0 }'`
        echo "CRITICAL - ${ERR_INFO}"
        rm -f $tmpfile
        return $STATE_CRITICAL
    fi
    
    if [[ "$N" -ge  "1" ]] 
    then
        local POOL_STATE=""
        local WAIT_CNT=""
        local RUN_STATE=""
        while read NAME VALUE
        do
            #PoolState WaitingForConnectionCurrentCount State
            #echo "NAME:${NAME} VALUE:${VALUE}"
            case "${NAME}" in
            PoolState:)
                POOL_STATE=${VALUE}
            ;;
            WaitingForConnectionCurrentCount:)
                WAIT_CNT=${VALUE}
            ;;
            State:)
                RUN_STATE=${VALUE}
            ;;
            esac
        done < ${tmpfile}
        
        rm -f ${tmpfile}
        #echo "POOL_STATE:${POOL_STATE}"
        #echo "WAIT_CNT:${WAIT_CNT}"
        #echo "RUN_STATE:${RUN_STATE}"
        if [[ "${POOL_STATE}" != "true" ]]
        then
            echo "CRITICAL - ${POOL_INFO} PoolState is ${POOL_STATE}"
            return $STATE_CRITICAL
        fi
        
        if [[ "${RUN_STATE}" != "Running" ]]
        then
            echo "CRITICAL - ${POOL_INFO} State is ${RUN_STATE}"
            return $STATE_CRITICAL
        fi
        
        if [[ "${WAIT_CNT}" -gt "0" ]]
        then
            echo "WARNING - ${POOL_INFO} WaitingForConnectionCurrentCount is ${WAIT_CNT}"
            return $STATE_WARNING
        fi        
    fi
    echo "OK - ${POOL_INFO} State is ${RUN_STATE},PoolState is ${POOL_STATE},WaitingForConnectionCurrentCount is ${WAIT_CNT}"
    
    return $STATE_OK
}

# do exit to delete tmpfile
do_exit() {
    if [ -n "$tmpfile" ] && [ -f $tmpfile ]
    then
        rm -f ${tmpfile}
    fi
}


if [ -z "$JAVA_HOME" ] 
then
    echo "Please set JAVA_HOME!"
    exit $STATE_UNKNOWN
fi

if [ -z "$CLASSPATH" ]
then
    echo "Please set CLASSPATH!"
    exit $STATE_UNKNOWN
else   
    echo $CLASSPATH | grep -q "weblogic.jar" 
    if [ $? -ne 0 ]
    then
        echo "Please add weblogic.jar to CLASSPATH!"
        exit $STATE_UNKNOWN
    fi
fi

PATH=$JAVA_HOME/bin:$PATH
export PATH

JDBC_TYPE="JDBCConnectionPoolRuntime"
SERVER_TYPE="ServerRuntime"

cmd="$1"

# Information options
case "$cmd" in
--help)
    print_help
    exit $STATE_OK
    ;;
-h)
    print_help
    exit $STATE_OK
    ;;
--version)
    print_revision $PROGNAME $REVISION
    exit $STATE_OK
    ;;
-V)
    print_revision $PROGNAME $REVISION
    exit $STATE_OK
    ;;
esac


#set -- `getopt -q H:p:v: "$@"`

#echo "$@"

#parse input args 
while [ -n "$1" ]
do
#    echo "\$1:"$1
    case "$1" in
    -H)
        #host
        HOST_NAME="$2"
        shift
        ;;
    -p)
        #port
        SERVER_PORT="$2"
        shift
        ;;
    -v)
        WL_ARGS="$2"
        shift
        ;;
    --)
        shift
        break
        ;;
    *)
        print_usage
        #exit $STATE_UNKNOWN
        ;;
    esac
    shift
done

#echo "parse weblogic parameters"
#parse weblogic parameters
parse_wls_para $WL_ARGS
#echo "end parse weblogic parameters"


case "${TYPE}" in
server)
    #server
    CHK_INFO=`check_wls_server`
    EXIT_STATE=$?
    echo $CHK_INFO
    do_exit
    exit $EXIT_STATE
    ;;
jdbcpool)
    #jdbc pool
    CHK_INFO=`check_wls_jdbcpool`
    EXIT_STATE=$?
    echo $CHK_INFO
    do_exit
    exit $EXIT_STATE
    ;;
*)
    print_usage
    exit $STATE_UNKNOWN
    ;;
esac

2.1将脚本上传到Nagios软件的libexec目录下,并创建一个软连接文件check_weblogic

使用的方法如下,
    check_weblogic.sh -H host -p port -v '--type [jdbcpool|server] --username username --password password --domain domainname --server servername [--pool poolname]'

2.2编辑被监控主机的nrpe.cfg文件,增加如下内容,

#check weblogic [check_wls]
command[check_wls_mydomain_myserver]=sudo su - weblogic -c "/usr/local/nagios/libexec/check_weblogic -H localhost -p 6088 -v '--type server --username weblogic1 --password weblogic1 --domain hb_circfnl_dom --server AdminServer'" ommand[check_wls_jdbcpool_mypool]=sudo su - weblogic -c "/usr/local/nagios/libexec/check_weblogic -H localhost -p 6088 -v '--type jdbcpool --username weblogic1 --password weblogic1 --domain hb_circfnl_dom --server AdminServer --pool circDataSource'"

编辑  /etc/sudoers文件

       #vim  /etc/sudoers

             nagios ALL=(ALL) NOPASSWD:/usr/local/nagios/libexec/check_tablespace.sh

同时得注释掉一下行,表示不需要控制终端

            #Defaults    requiretty

否则会出现:NRPE: Unable to read output 错误

#########################################################################################

PS:这一步我搞了很久,不是提示Unable to read output 错误,就是提示:“Please set JAVA_HOME!”,事后分析具体原因写在这,免得走弯路,前面是在root环境下执行的,但是监控主机是和nagios用户交流的,所以必须在nagios运行命令,而监控的命令调用的是weblogic用户的的方法,所以的切得切换到weblogic用户下执行命令。本来想用sudo命令,发现不行,还得结合su命令。入下图所示:

2.3编辑监控主机的/usr/local/nagios/etc/objects/services.cfg文件(该文件是自己新增的)并且被监控的这台主机也应该在host.cfg中定义好,增加如下内容

define service{
        use                     local-service
        host_name               weblogic
        service_description     Weblogic Server
        check_command           check_nrpe!check_wls_mydomain_myserver
        }
define service{
        use                     local-service
        host_name               weblogic
        service_description     Weblogic JDBCPool
        check_command           check_nrpe!check_wls_jdbcpool_mypool
        }

 3.检查是否成功

3.1重启被监控主机的nrpe

service xinetd restart

3.2重启nagios服务

service nagios restart

3.3进入监控界面

 

 参考资料:

http://skymax.blog.51cto.com/365901/101603

http://blog.csdn.net/forest_hou/article/details/5468239

http://blog.itpub.net/228190/viewspace-586512/

 

 

 

   

posted @ 2015-12-31 18:26  头痛不头痛  阅读(1026)  评论(0编辑  收藏  举报