监控mysql主从同步
1,昨天看到shell一道面试题,需求如下:
监控MySQL主从同步是否异常,如果异常,则发送短信或者邮件给管理员。提示:如果没主从同步环境,可以用下面文本放到文件里读取来模拟:
阶段1:开发一个守护进程脚本每30秒实现检测一次。
阶段2:如果同步出现如下错误号(1158,1159,1008,1007,1062),则跳过错误。
阶段3:请使用数组技术实现上述脚本(获取主从判断及错误号部分)
[root@oldboy~]# mysql -uroot -p'oldboy' -S /data/3307/mysql.sock -e "show slavestatus\G;" *************************** 1. row *************************** Slave_IO_State:Waiting for master to send event Master_Host:10.0.0.179 #当前的mysql master服务器主机 Master_User: rep Master_Port: 3306 Connect_Retry: 60 Master_Log_File:mysql-bin.000013 Read_Master_Log_Pos: 502547 Relay_Log_File:relay-bin.000013 Relay_Log_Pos:251 Relay_Master_Log_File:mysql-bin.000013 Slave_IO_Running:Yes Slave_SQL_Running: Yes Replicate_Do_DB: Replicate_Ignore_DB: mysql Replicate_Do_Table: Replicate_Ignore_Table: Replicate_Wild_Do_Table: Replicate_Wild_Ignore_Table: Last_Errno: 0 Last_Error: Skip_Counter: 0 Exec_Master_Log_Pos: 502547 Relay_Log_Space:502986 Until_Condition:None Until_Log_File: Until_Log_Pos: 0 Master_SSL_Allowed: No Master_SSL_CA_File: Master_SSL_CA_Path: Master_SSL_Cert: Master_SSL_Cipher: Master_SSL_Key: Seconds_Behind_Master: 0 #和主库比同步延迟的秒数,这个参数很重要 Master_SSL_Verify_Server_Cert: No Last_IO_Errno: 0 Last_IO_Error: Last_SQL_Errno: 0 Last_SQL_Error:
2,好久没写过脚本,所以就写了一个,如果各位看观发现有问题,还请指教,谢谢!
#!/bin/bash #author:linsir #qq:384108391 #检查mysql同步情况 #定义初始变量 mysql_sync_status=/tmp/mysql_sync_status.txt error_log=/tmp/mysql_sync_error.txt user=root passwd=linlin CMD="/usr/local/mysql2/bin/mysql -S /tmp/3307.sock -u$user -p$passwd -e" mail_address='384108391@qq.com' time=`date +%F-%T` ip=`ifconfig eth0|awk -F '[ :]+' '{print $4}'|awk "{if(NR==2) print}"` errorcode=(1158 1159 1008 1007 1062) #mysql主从同步会出现错误代码1158 1159 1008 1007 1062,这些错误代码不会对主从同步影响非常大。所以如果遇到这个错误代码,就到下个需要执行的sql即可 #检查mysql status中的Last_Errno的值,正常一般为0. Continue_errorcode() { $CMD "show slave status\G" > $mysql_sync_status 2>/dev/null code=`awk '/Last_Errno/ {print $2}' $mysql_sync_status` if [ "$code" != "0" ];then for ccode in ${errorcode[*]} do if [ "$code" = "$ccode" ];then $CMD "stop slave;set global sql_slave_skip_counter = 1;start slave;" 2>/dev/null $CMD "show slave status\G" > $mysql_sync_status 2>/dev/null flag=1 && break fi done else flag=1 fi } Check_ok() { IO=`awk '/Slave_IO_Running/ {print $2}' $mysql_sync_status` SQL=`awk '/Slave_SQL_Running:/ {print $2}' $mysql_sync_status` # echo $IO # echo $SQL if [ "$IO" = "No" -o "$SQL" = "No" ];then Error_format "$time" "$ip" "mysql同步失败,IO或SQL线程down" fi } Delay_time() { t=`awk '/Seconds_Behind_Master/ {print $2}' $mysql_sync_status` if [ "$t" = "NULL" ] ;then Error_format "$time" "$ip" "mysql同步出错,Seconds_Behind_Master is NUll" else if [ $t -gt 0 ];then Error_format "$time" "$ip" "mysql同步超时,超时时间为$t" fi fi } Error_format() { echo " Mysql master-slave synchronization state ---------------------------------------------------------- time: $1 host: $2 question: $3 ---------------------------------------------------------- " >> $error_log } main() { while true do cat /dev/null >$error_log Continue_errorcode if [ "$flag" != "1" ];then Error_format "$time" "$ip" "mysql同步出错,错误代码:$code" fi Check_ok Delay_time [ `cat $error_log|wc -l` -ne 0 ] && mail -s 'mysql同步异常,请及时处理' -a $mysql_sync_status $mail_address < $error_log sleep 30 done } main