检查数据库连接性
输入:数据库名字 输出:连接数据库测试信息 #!/bin/ksh # # Description : Check Status of Oracle Instance (All databases | a specific one ) , Listener, Oracle Net connection # # Date of Modification Modification # =========================== =============================== # 12-AUG-2010 Sing # #set -x PATH=/usr/local/bin:/usr/bin:$PATH:. . $HOME/utility/macro/macro.env OVO_DIR=/tmp LOGFILE=$REPORT_DIR/chk_conn_subsystems.log_`date '+%y%m%d'` MAILLOG=$TMP_DIR/chk_conn_subsystems.log_`date '+%y%m%d%H%M'` ERRLOG=$TMP_DIR/chk_conn_subsystems.err tmpfile=$TMP_DIR/chk_conn_subsystems.tmp1 OVOLOG=$OVO_DIR/chk_conn_subsystems.log OVOSMY=$OVO_DIR/chk_conn_subsystems_smy.log db_list="A B C D" err_dblst="" sname=yidbmon EMAIL_LST=$ADMIN_DIR/email.lst mail_flag=N WAIT_TIME=$1 rm $OVOLOG touch $tmpfile $OVOLOG for db_name in ${db_list} do LOGIN_ID=oper/oper123 if [ "${db_name}" = "a" ] || [ "${db_name}" = "b" ] ; then LOGIN_ID=oper/oper321 else if [ "${db_name}" = "c" ] || [ "${db_name}" = "d" ] ; then LOGIN_ID=sysman/oracle123 fi fi $ORACLE_HOME/bin/sqlplus -s ${LOGIN_ID}@${db_name} @$SCRIPT_DIR/chk_conn.sql > $tmpfile.$db_name 2>&1 & sleep ${WAIT_TIME} Msg="" dt1=`date '+%y/%m/%d %H:%M:%S'` dt2=`date '+%y%m%d%H%M%S'` grep -i $db_name"-" $tmpfile.$db_name >/dev/null if [ $? -ne 0 ]; then mail_flag="Y" echo "Unable to connect to database: " $db_name ".."${dt1} >> $MAILLOG err_dblst=${err_dblst}" "$db_name"("${dt2}") " else echo "Connect to database successfully: " $db_name ".."${dt1} >> $MAILLOG fi done echo "\n" >> $MAILLOG grep -i "Unable to connect" $MAILLOG >> /dev/null if [ $? -eq 0 ]; then echo "Possible Causes:" > $ERRLOG echo "1. Scheduled shutdown." >> $ERRLOG echo "2. Maybe the connection needs more than "${WAIT_TIME}" seconds to complete." >> $ERRLOG echo "3. Database or listener is not up and running." >> $ERRLOG echo "Actions:" >> $ERRLOG echo "1. Please ignore this message if it is a scheduled shutdown." >> $ERRLOG echo "2. Otherwise run program /macro/chk_db.sh to check database services on server side." >> $ERRLOG echo "\n" >> $ERRLOG # for iname in "dba@$EMAIL_LST" for iname in `cat $EMAIL_LST` do cat $MAILLOG | grep "Unable to connect to database:" >> $ERRLOG /bin/mailx -s "Failed to connect to database " $iname < $ERRLOG done else rm -f $tmpfile fi if [ "${err_dblst}" != "" ]; then echo "critical Unable to connect to db : "${err_dblst}" Check Email or log "${OVOLOG}" on server "$sname > $OVOLOG fi cat $MAILLOG >> $LOGFILE cat $OVOLOG >> $OVOSMY chmod 777 $OVOLOG rm -f $MAILLOG $tmpfile find $REPORT_DIR -name chk_conn_subsystems.log_\* -mtime +5 -exec rm {} \;
可能原因:
1. rman 异常中断。导致数据库session里的rman 程序异常。无法正常清理,强制kill session 也没用。
the issue was due to the 3rd party SBT library rman backup from the investigation, which the rman sessions in db doesn't cleared after backup
fixed:
重启备份软件,或者重启主机
2.vm 底部的杀毒软件导致连接过慢。()
fixed:
联系OA 查询
3.内存消耗异常
查看os log 会有告警信息出来。
fixed:
解决内存问题,或者升级oracle
3.db 连接偶发性连接过慢,原因未明。
4.监控脚本本身异常 导致系统 误报。
大量未删除的文件 导致 脚本无法连接数据库,写入日志,出现如下提示。
[root@yidbmon1 ~]# /usr/sbin/lsof |grep home
bash 6623 oracle cwd DIR 253,8 12288 131223 /home/oracle/utility/macro
tnslsnr 6940 oracle cwd DIR 253,8 4096 131073 /home/oracle
bash 9990 oracle cwd DIR 253,8 4096 131099 /home/oracle/11
bash 9990 oracle 254r REG 253,8 3493 143496 /home/oracle/11/osw_sort_11pr.sh
bash 9992 oracle cwd DIR 253,8 4096 131099 /home/oracle/11
tee 9993 oracle cwd DIR 253,8 4096 131099 /home/oracle/11
sh 11637 oracle cwd DIR 253,8 4096 131099 /home/oracle/11
sh 11637 oracle 1w REG 253,8 1994113024 147308 /home/oracle/11/nohup.out (deleted)
sh 11637 oracle 2w REG 253,8 1994113024 147308 /home/oracle/11/nohup.out (deleted)
sh 11637 oracle 255r REG 253,8 3493 143496 /home/oracle/11/osw_sort_11pr.sh
sleep 29934 oracle 2w REG 253,8 1994113024 147308 /home/oracle/11/nohup.out (deleted)
sleep 29957 oracle cwd DIR 253,8 4096 131099 /home/oracle/11
sleep 29988 oracle cwd DIR 253,8 4096 131099 /home/oracle/1
6.监听程序异常,原因未明。
fixed:
以管理员身份重启监听。