Clannaddada

导航

zabbix监控脑裂

zabbix监控脑裂

对脑裂的监控应在备用服务器上进行,通过添加zabbix自定义监控进行。

监控出现异常的两种状态:

  • 正常情况下master主机nginx和keepalived为开启,backup主机keepalived为开启nginx为关闭,当master主机发生异常时backup主机通过脚本抢夺vip
  • 当出现脑裂时主备的两台主机都会有vip

所以只需要监控备主机的网卡是否有vip就可以达到zabbix监控脑裂

备机上出现VIP有两种情况:

  • 发生了脑裂
  • 正常的主备切换

监控只是监控发生脑裂的可能性,不能保证一定是发生了脑裂,因为正常的主备切换VIP也是会到备上的。

在backup主机安装zabbix的客户端,在192.168.118.137主机安装zabbix服务端用于使用web网页管理监控

zabbix服务端部署参考zabbix介绍与部署

Keepalived配置里同一 VRRP实例如果 virtual_router_id两端参数配置不一致也会导致裂脑问题发生。

环境介绍

主机 ip
master 192.168.118.129
backup 192.168.118.128
zabbix 192.168.118.137

backup部署zabbix客户端

//创建用户
[root@backup ~]# useradd -rMs /sbin/nologin zabbix

//下载编译工具和依赖包
[root@backup ~]# dnf -y install make gcc gcc-c++ pcre-devel openssl openssl-devel

//下载软件包
[root@backup ~]# wget https://cdn.zabbix.com/zabbix/sources/stable/6.2/zabbix-6.2.2.tar.gz

//解压,编译安装zabbix_agentd
[root@backup ~]# tar -xf zabbix-6.2.2.tar.gz
[root@backup ~]# cd zabbix-6.2.2/
[root@backup zabbix-6.2.2]# ./configure --enable-agent
[root@backup zabbix-6.2.2]# make install

//修改配置文件
[root@backup ~]# vim /usr/local/etc/zabbix_agentd.conf
…………
Server=192.168.118.137		//服务端主机ip
…………
ServerActive=192.168.118.137		//agent主动模式
…………
Hostname=backup		//zabbix系统内主机名
…………

//启动zabbix_agentd
[root@backup ~]# zabbix_agentd
[root@backup ~]# ss -antl
State   Recv-Q   Send-Q     Local Address:Port      Peer Address:Port  Process  
LISTEN  0        128              0.0.0.0:22             0.0.0.0:*              
LISTEN  0        128              0.0.0.0:10050          0.0.0.0:*              
LISTEN  0        128                 [::]:22                [::]:* 

backup上编写脚本

[root@backup ~]# cd /scripts/
[root@backup scripts]# vim check_keepalived.sh
[root@backup scripts]# cat check_keepalived.sh 
#!/bin/bash

if [ `ip a show ens33 |grep 192.168.118.250|wc -l` -ne 0 ]
then
            echo "1"
    else
            echo "0"
fi

#在最后一行添加
[root@backup ~]# vim /usr/local/etc/zabbix_agentd.conf
UserParameter=check_keepalived,/bin/bash /scripts/check_keepalived.sh

#更改配置文件后重启zabbix服务
[root@backup ~]# pkill zabbix
[root@backup ~]# zabbix_agentd

zabbix服务端测试脚本

[root@zabbix ~]# zabbix_get -s 192.168.118.128 -k check_keepalived
0

监控backup服务器

添加监控主机

image

添加监控项

image

image

image

查看监控数据

image

添加触发器

image

image

image

若需要添加邮件报警请参考zabbix监控流程

测试

Keepalived配置里同一 VRRP实例如果 virtual_router_id两端参数配置不一致也会导致裂脑问题发生。

让主备产生脑裂

[root@backup ~]# vim /etc/keepalived/keepalived.conf
[root@backup ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived

global_defs {
    router_id lb02
}

vrrp_instance VI_1 {
    state BACKUP
    interface ens33
    virtual_router_id 50     //正常情况下此处id主备一致 改为不一致就可以产生脑裂
    priority 90
    advert_int 1
    authentication {
        auth_type PASS
        auth_pass 123456
    }
    virtual_ipaddress {
        192.168.118.250
    }
    notify_master "/scripts/notify.sh master 192.168.118.250"
    notify_backup "/scripts/notify.sh backup 192.168.118.250"
}
virtual_server 192.168.118.250 80 {
    delay_loop 6
    lb_algo rr
    lb_kind NAT
    persistence_timeout 50
    protocol TCP

    real_server 192.168.118.129 80 {
        weight 1
        TCP_CHECK {
            connect_port 80
            connect_timeout 3
            nb_get_retry 3
            delay_before_retry 3
        }
    }

    real_server 192.168.118.128 80 {
        weight 1
        TCP_CHECK {
            connect_port 80 
            connect_timeout 3
            nb_get_retry 3
            delay_before_retry 3
        }
    }
}

//重启服务
[root@backup ~]# systemctl restart keepalived

查看VIP

master

[root@master ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 00:0c:29:b5:9a:13 brd ff:ff:ff:ff:ff:ff
    inet 192.168.118.129/24 brd 192.168.118.255 scope global dynamic noprefixroute ens33
       valid_lft 1066sec preferred_lft 1066sec
    inet 192.168.118.250/32 scope global ens33
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:feb5:9a13/64 scope link noprefixroute 
       valid_lft forever preferred_lft forever

backup

[root@backup ~]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: ens33: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc fq_codel state UP group default qlen 1000
    link/ether 00:0c:29:16:02:99 brd ff:ff:ff:ff:ff:ff
    inet 192.168.118.128/24 brd 192.168.118.255 scope global dynamic noprefixroute ens33
       valid_lft 1054sec preferred_lft 1054sec
    inet 192.168.118.250/32 scope global ens33
       valid_lft forever preferred_lft forever
    inet6 fe80::20c:29ff:fe16:299/64 scope link noprefixroute 
       valid_lft forever preferred_lft forever

同时存在vip

image

posted on 2022-10-09 23:08  linux-ada  阅读(84)  评论(0编辑  收藏  举报