Loading

源码搭建Zabbix4.0.23LTS监控系统

实验环境

centos 7.5

主机名

IP地址

配置

应用

controlnode

172.16.1.120/24

4/8G/60G

java-1.8.0-openjdk

zabbix server 4.0.23 LTS

mysql 5.7.31

nginx 1.19.2

php 5.6.36

JavaGateway 4.0.23

ZabbixAgent 4.0.23

grafana6.7.4

docker(Prometheus)

slavenode1

172.16.1.121/24

2/2G/60G

ZabbixAgent 4.0.23

zabbix proxy 4.0.23

mysql 5.7.30(master)

nginx

jdk1.8.0_45

tomct 8.5.37

slavenode2

172.16.1.122/24

2/2G/60G

ZabbixAgent 4.0.23

mysql 5.7.30(slave)

docker(nginx,apache,cadvisor)

slavenode3

172.16.1.123/24

1/1G/60G

ZabbixAgent 4.0.23

zabbix 官网:https://www.zabbix.com/

zabbix4.0 LTS官方帮助手册:https://www.zabbix.com/documentation/4.0/zh/manual

wps1 

 

wps2 

 

wps3 

 

wps4 

一、 部署Zabbix服务端

172.16.1.120节点上操作

为保证顺利部署,先关闭selinux,关闭防火墙

# setenforce 0

# systemctl stop firewalld

# systemctl disable firewalld

更换主机epel源为阿里云云的源

# wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo

# yum clean all

1. 安装MySQL

部署MySQL5.7版本

# yum -y install yum-utils

# rpm -ivh https://dev.mysql.com/get/mysql80-community-release-el7-1.noarch.rpm

# yum-config-manager --disable mysql80-community

# yum-config-manager --enable mysql57-community

# yum install mysql-community-server mysql-community-devel -y

# rpm -qa | grep mysql

mysql-community-common-5.7.31-1.el7.x86_64

mysql-community-libs-5.7.31-1.el7.x86_64

mysql-community-client-5.7.31-1.el7.x86_64

mysql-community-server-5.7.31-1.el7.x86_64

mysql-community-devel-5.7.31-1.el7.x86_64

mysql80-community-release-el7-1.noarch

mysql-community-libs-compat-5.7.31-1.el7.x86_64

修改mysql配置文件

# cat /etc/my.cnf

[client]

port = 3306

default-character-set = utf8

socket = /var/lib/mysql/mysql.sock

[mysql]

no-auto-rehash

[mysqld]

user = mysql

port = 3306

basedir = /usr

datadir = /var/lib/mysql

socket = /var/lib/mysql/mysql.sock

bind-address = 0.0.0.0

pid-file = /var/run/mysqld/mysqld.pid

character-set-server = utf8

collation-server = utf8_general_ci

log-error = /var/log/mysqld.log

slow_query_log = ON

long_query_time = 2

slow_query_log_file = /var/lib/mysql/mysql-slow.log

max_connections = 10240

open_files_limit = 65535

innodb_buffer_pool_size = 3G

innodb_flush_log_at_trx_commit = 2

innodb_log_file_size = 256M

transaction_isolation = READ-COMMITTE

default-storage-engine = innodb

innodb_file_per_table = on

symbolic-links = 0

explicit_defaults_for_timestamp = 1

skip-name-resolve

lower_case_table_names = 1

server-id = 1

[mysqldump]

quick

max_allowed_packet = 32M

 

# systemctl start mysqld

# systemctl status mysqld

# systemctl enable mysqld

# netstat -tunlp | grep mysql

tcp        0      0 0.0.0.0:3306            0.0.0.0:*               LISTEN      3620/mysqld

重新设置root账号密码

# grep 'temporary password' /var/log/mysqld.log

# mysql -uroot -p

mysql> ALTER USER 'root'@'localhost' IDENTIFIED BY 'zabbix@LC2020';

mysql> create database zabbix character set utf8 collate utf8_bin;

mysql> grant all privileges on zabbix.* to zabbix@"%" identified by "zabbix@LC2021";

mysql> flush privileges;

mysql> quit;

2. 部署Nginx

源码编译安装

# wget http://nginx.org/download/nginx-1.19.2.tar.gz

# yum install gcc pcre-devel openssl-devel -y

# useradd -M -s /sbin/nologin nginx

# tar -xzf nginx-1.19.2.tar.gz

# cd nginx-1.19.2/

# ./configure --prefix=/usr/local/nginx --user=nginx --group=nginx --with-http_ssl_module --with-http_stub_status_module

# make && make install

 

修改配置文件修改pid位置

# vim /usr/local/nginx/conf/nginx.conf

pid        /var/run/nginx.pid;

 

配置systemd管理服务

# vim /usr/lib/systemd/system/nginx.service

[Unit]

Description=The NGINX HTTP and reverse proxy server

After=syslog.target network.target remote-fs.target nss-lookup.target

 

[Service]

Type=forking

PIDFile=/var/run/nginx.pid

ExecStartPre=/usr/local/nginx/sbin/nginx -t

ExecStart=/usr/local/nginx/sbin/nginx

ExecReload=/usr/local/nginx/sbin/nginx -s reload

ExecStop=/bin/kill -s QUIT $MAINPID

PrivateTmp=true

 

[Install]

WantedBy=multi-user.target

 

修改Nginx配置文件:

# vim /usr/local/nginx/conf/nginx.conf

server {

        listen       80;

        server_name  localhost;

 

        location / {

            root   html;

            index  index.php index.html index.htm;

        }

 

        location ~ \.php$ {

            root           html;

            fastcgi_pass   127.0.0.1:9000;

            fastcgi_index  index.php;

            fastcgi_param  SCRIPT_FILENAME  $document_root$fastcgi_script_name;

            include        fastcgi_params;

        }

    }

 

启动并加入开机启动:

# systemctl daemon-reload

# systemctl start nginx

# systemctl enable nginx

# netstat -tunlp | grep nginx

tcp        0      0 0.0.0.0:80              0.0.0.0:*               LISTEN      1272/nginx: master

3. 部署PHP

安装依赖包:

# yum install -y gcc gcc-c++ make gd-devel libxml2-devel \

libcurl-devel libjpeg-devel libpng-devel openssl-devel \

libxslt-devel

 

源码编译安装PHP:

# wget http://docs.php.net/distributions/php-5.6.36.tar.gz

# tar zxf php-5.6.36.tar.gz

# cd php-5.6.36/

# ./configure --prefix=/usr/local/php \

--with-fpm-user=nginx \

--with-fpm-group=nginx \

--with-config-file-path=/usr/local/php/etc \

--enable-fpm --enable-opcache \

--with-mysql --with-mysqli \

--enable-session --with-zlib --with-curl --with-gd \

--with-jpeg-dir --with-png-dir --with-freetype-dir \

--enable-mbstring --enable-xmlwriter --enable-xmlreader \

--enable-xml --enable-sockets --enable-bcmath --with-gettext --with-ldap

# make -j 8 && make install

 

拷贝配置文件

# cp php.ini-production /usr/local/php/etc/php.ini

# cp sapi/fpm/php-fpm.conf /usr/local/php/etc/php-fpm.conf

# cp sapi/fpm/php-fpm.service /usr/lib/systemd/system/

 

配置systemd管理服务

# vim /usr/lib/systemd/system/php-fpm.service

[Unit]

Description=The PHP FastCGI Process Manager

After=syslog.target network.target

 

[Service]

Type=simple

PIDFile=/usr/local/php/var/run/php-fpm.pid

ExecStart=/usr/local/php/sbin/php-fpm --nodaemonize --fpm-config /usr/local/php/etc/php-fpm.conf

ExecReload=/bin/kill -USR2 $MAINPID

 

[Install]

WantedBy=multi-user.target

 

修改配置参数

# vim /usr/local/php/etc/php.ini

max_execution_time = 300

memory_limit = 128M

post_max_size = 16M

upload_max_filesize = 2M

max_input_time = 300

always_populate_raw_post_data = -1

date.timezone = Asia/Shanghai

 

启动并加入开机启动:

# systemctl daemon-reload

# systemctl start php-fpm

# systemctl enable php-fpm

# netstat -tunlp | grep php-fpm

tcp        0      0 127.0.0.1:9000          0.0.0.0:*               LISTEN      99393/php-fpm: mast

4. 部署Zabbix Server

安装依赖包

# yum install libxml2-devel libcurl-devel libevent-devel net-snmp-devel mysql-community-devel java-1.8.0-openjdk java-1.8.0-openjdk-devel -y

 

源码编译安装zabbix:

# wget https://cdn.zabbix.com/zabbix/sources/stable/4.0/zabbix-4.0.23.tar.gz

# tar -zxf zabbix-4.0.23.tar.gz

# useradd -M zabbix -s /sbin/nologin

# cd zabbix-4.0.23/

# ./configure --prefix=/usr/local/zabbix --enable-server --enable-agent --enable-java --with-mysql --enable-ipv6 --with-net-snmp --with-libcurl --with-libxml2

# make -j 8 && make install

 

修改配置文件

# vim /usr/local/zabbix/etc/zabbix_server.conf

DBHost=localhost

DBName=zabbix

DBUser=zabbix

DBPassword=zabbix@LC2021

DBSocket=/var/lib/mysql/mysql.sock

DBPort=3306

Timeout=30

 

配置systemd管理服务

# vim /usr/lib/systemd/system/zabbix_server.service

[Unit]

Description=Zabbix Server

After=syslog.target

After=network.target

 

[Service]

Environment="CONFFILE=/usr/local/zabbix/etc/zabbix_server.conf"

EnvironmentFile=-/etc/sysconfig/zabbix-server

Type=forking

Restart=on-failure

PIDFile=/tmp/zabbix_server.pid

KillMode=control-group

ExecStart=/usr/local/zabbix/sbin/zabbix_server -c $CONFFILE

ExecStop=/bin/kill -SIGTERM $MAINPID

RestartSec=10s

TimeoutSec=0

 

[Install]

WantedBy=multi-user.target

 

导入表结构:

# mysql -uzabbix -p'zabbix@LC2021' zabbix < database/mysql/schema.sql

# mysql -uzabbix -p'zabbix@LC2021' zabbix < database/mysql/images.sql

# mysql -uzabbix -p'zabbix@LC2021' zabbix < database/mysql/data.sql

 

启动并加入开机启动

# systemctl daemon-reload

# systemctl start zabbix-server

# systemctl enable zabbix-server

# netstat -tunlp | grep zabbix_server

tcp        0      0 0.0.0.0:10051           0.0.0.0:*               LISTEN      1880/zabbix_server 

tcp6       0      0 :::10051                :::*                    LISTEN      1880/zabbix_server

 

启动zabbix agent 用来监控zabbix server主机

配置systemd管理服务

# cat /usr/lib/systemd/system/zabbix-agent.service

[Unit]

Description=Zabbix Agent

After=syslog.target

After=network.target

 

[Service]

Environment="CONFFILE=/usr/local/zabbix/etc/zabbix_agentd.conf"

EnvironmentFile=-/etc/sysconfig/zabbix-agentd

Type=forking

Restart=on-failure

PIDFile=/tmp/zabbix_agentd.pid

KillMode=control-group

ExecStart=/usr/local/zabbix/sbin/zabbix_agentd -c $CONFFILE

ExecStop=/bin/kill -SIGTERM $MAINPID

RestartSec=10s

TimeoutSec=0

 

[Install]

WantedBy=multi-user.target

 

启动并加入开机启动

# systemctl daemon-reload

# systemctl start zabbix-agent

# systemctl enable zabbix-agent

# netstat -tunlp | grep zabbix_agentd

tcp        0      0 0.0.0.0:10050           0.0.0.0:*               LISTEN      1944/zabbix_agentd 

tcp6       0      0 :::10050                :::*                    LISTEN      1944/zabbix_agentd

5. 部署Zabbix Web

Zabbix前端使用PHP写的,所以必须运行在PHP支持的Web服务器上。只需要简单的从frontends/php路径下复制PHP文件到Web服务器的网站根目录,就可以访问页面了。

 

拷贝Zabbix前端程序到网站根目录:

# rm -rf /usr/local/nginx/html/*

# cp -a frontends/php/* /usr/local/nginx/html/

# chown -R nginx.nginx /usr/local/nginx/html/

 

浏览器中输入 http://172.16.1.120/ 进行安装

wps5 

 

 

wps6 

 

wps7 

 

 

 

 

 

 

 

wps8 

 

wps9 

 

 

 

 

 

 

 

修改 zabbix server 的字符编码,防止界面乱码

# cd /usr/local/nginx/html/assets/fonts/

# mv DejaVuSans.ttf DejaVuSans.ttf.bak

# 上传windows 中的字体样式 simkai.ttf

# ls

DejaVuSans.ttf.bak  simkai.ttf

# mv simkai.ttf DejaVuSans.ttf

# 不需要重启zabbix server 即可解决乱码的问题

二、监控Linux服务器

1. 部署Zabbix Agent

172.16.1.121 节点上操作

安装

# rpm -ivh https://mirrors.tuna.tsinghua.edu.cn/zabbix/zabbix/4.0/rhel/7/x86_64/zabbix-agent-4.0.23-1.el7.x86_64.rpm

 

添加配置文件:

# cat /etc/zabbix/zabbix_agentd.conf

PidFile=/var/run/zabbix/zabbix_agentd.pid

LogFile=/var/log/zabbix/zabbix_agentd.log

LogFileSize=0

DebugLevel=3

EnableRemoteCommands=0

Server=<Zabbix服务器IP>

ListenPort=10050

ListenIP=<当前服务器IP>

StartAgents=3

ServerActive=<Zabbix服务器IP>

Hostname=<当前服务器IP>

HostnameItem=system.hostname

HostMetadata=linux

HostMetadataItem=system.uname

RefreshActiveChecks=120

BufferSend=5

BufferSize=100

MaxLinesPerSecond=20

Timeout=30

AllowRoot=0

User=zabbix

Include=/etc/zabbix/zabbix_agentd.d/*.conf

<Zabbix服务器IP>172.16.1.120

<当前服务器IP>172.16.1.121

 

Hostname=<当前服务器IP>

主动模式下必须和zabbix server监控主机的主机名相同,手动配置

HostnameItem=system.hostname

如果Hostname没有配置取该值,通过系统变量自动取值

HostMetadata=linux

主动注册时的主机元数据,手动配置

HostMetadataItem=system.uname

如果HostMetadata没有配置取该值,通过系统变量自动取值

 

启动并加入开机启动:

# systemctl start zabbix-agent

# systemctl enable zabbix-agent

# netstat -tunlp | grep zabbix_agentd

tcp        0      0 0.0.0.0:10050           0.0.0.0:*               LISTEN      1581/zabbix_agentd

2. 连通性测试

172.16.1.120节点上执行以下命令判断是否可以正常连接到Zabbix agent服务器

# /usr/local/zabbix/bin/zabbix_get -s 172.16.1.121 -p 10050 -k "system.uptime"

5091

返回数字说明连通正常否则可能是被监控机防火墙阻断,无法连通,可以尝试172.16.1.121节点上添加放行规则

iptables -I INPUT -s 172.16.1.120 -p tcp --dport 10050 -j ACCEPT

3. Web页面添加主机

3.1 添加主机

wps10 

 

wps11 

3.2 链接模板

wps12 

选择链接的模板为Template OS Linux

wps13 

 

等待几分钟如图说明已监控上

wps14 

 

 

 

 

 

 

3.3 查看图形

wps15 

 

4. 添加邮件告警

wps16 

wps17 

4.1 配置告警媒介

wps18 

 

wps19 

4.2 配置动作(触发器)

wps20 

配置触发条件

wps21 

 

 

 

 

 

配置告警内容,配置告警内容通过email告警媒介发送给用户或用户群组:

wps22 

# 默认标题:

Problem:{EVENT.NAME}

# 消息内容:

告警主机:{HOSTNAME1}

告警时间:{EVENT.DATE} {EVENT.TIME}

告警等级:{TRIGGER.SEVERITY}

告警信息:{TRIGGER.NAME}

告警项目:{TRIGGER.KEY1}

问题详情:{ITEM.NAME}:{ITEM.VALUE}

当前状态:{TRIGGER.STATUS}:{ITEM.VALUE1}

事件ID:{EVENT.ID}

 

配置告警恢复操作的告警消息内容和通知所有参与者:

wps23 

 

 

4.3 配置告警接收人

配置管理员账号(Admin)接收邮件

wps24 

 

可添加多个收件人

wps25 

 

wps26 

 

 

wps27 

配置完成

4.4 告警测试

1在172.16.1.121 节点上安装 nginx 服务作为测试站点

# yum install nginx -y

# systemctl start nginx

 

2 web 站点监控

wps28 

 

wps29 

 

 

wps30 

 

wps31 

 

wps32 

说明:web 站点监控可用于在不同的主机上监控同一个站点的访问速度和响应速度。不可以使用web场景中的监控项做触发器使用,否则无法触发报警,因为该监控项的最后一个值不会发生改变。

 

 

 

 

 

3 nginx 服务 80 端口监控

(1) 创建应用集

wps33 

 

wps34 

 

(2) 创建监控项

wps35 

 

wps36 

net.tcp.listen[ip,port] 监控项说明

wps37 

(3)创建触发器

wps38 

 

wps39 

 

表达式说明:

wps40 

 

查看触发器状态(正常,说明拿到的值是1):

wps41 

 

 

(4)创建图形

wps42 

 

wps43 

 

查看创建的图形

wps44 

 

4 邮件报警

(1)停止172.16.1.121 节点上的nginx 服务

# systemctl stop nginx

(3) 查看触发器状态

wps45 

 

wps46 

 

(4) 查看problem报警邮件

wps47 

 

(5) 启动172.16.1.121 节点上的nginx 服务,查看Resolved告警邮件

# systemctl start nginx

wps48 

 

5.自定义脚本告警

wps49 

172.16.1.120 节点上进行操作

zabbix_server.conf配置文件中配置存放自定义告警脚本的目录,如果该目录不存在,自己创建即可

# vim /usr/local/zabbix/etc/zabbix_server.conf

AlertScriptsPath=/usr/local/zabbix/share/zabbix/alertscripts

# systemctl restart zabbix-server.service

# cd /usr/local/zabbix/share/zabbix/alertscripts/

5.1 自定义email 脚本

1 编写脚本

# cat sendmail.sh

#!/bin/bash

source /etc/profile

 

# 配置邮件服务器

# yum install mailx -y

# vim /etc/mail.rc

# set from=hyjy2504164765@163.com smtp=smtp.163.com

# set smtp-auth-user=hyjy2504164765@163.com smtp-auth-password=linux123

# set smtp-auth=login

# 测试,163邮箱服务器需要开启 pop3 smtp 服务,并客户端授权密码

# echo "this is test mail." | mail -s "monitor test" 2504164765@qq.com

 

# 告警脚本

# cat /usr/local/zabbix/share/zabbix/alertscripts/sendmail.sh

to=$1

subject=$2

body=$3

FILE=/tmp/mail.tmp

echo "$body" > $FILE

dos2unix -k $FILE &>/dev/null

# 解决正文变成附件.bin

mail -s "$subject" "$to" < $FILE

 

# chmod +x sendmail.sh

# yum install dos2unix -y

# chown -R zabbix.zabbix /tmp/mail.tmp

# ./sendmail.sh 2504164765@qq.com "subject:test" "this is test."

# systemctl restart zabbix-server.service

 

 

 

 

 

 

2 管理>报警媒介类型

wps50 

 

3 配置>(触发器)动作(添加报警发送的媒介)

wps51 

 

4 管理>用户—>Admin(配置告警接收人)

wps52 

5.2 自定义钉钉告警脚本

wps53 

在钉钉群聊中创建钉钉机器人,这里省略步骤。

1 编写脚本

# cat dingding.py

#!/usr/bin/python

# -*- coding: utf-8 -*-

import requests

import json

import sys

import os

 

headers = {'Content-Type': 'application/json;charset=utf-8'}

api_url = "https://oapi.dingtalk.com/robot/send?access_token=cacd9b61ea412be2ecc1b7060c23d71ac75f34ae5596f9025dc8b4c8c10f594c"

# 修改为自己钉钉机器人的webhook即可

 

def msg(text):

    json_text= {

     "msgtype": "text",

     "text": {

         "content": text

     },

     "at": {

         "atMobiles": [

             "186..."

         ],

         "isAtAll": False

     }

    }

    print requests.post(api_url,json.dumps(json_text),headers=headers).content

     

if __name__ == '__main__':

    to = sys.argv[1]

    subject = sys.argv[2]

    text = sys.argv[3]

msg(text)

 

# yum install python-requests -y

# chmod +x dingding.py

# ./dingding.py ding ding "告警test"

# systemctl restart zabbix-server.service

 

2 管理>报警媒介类型

wps54 

 

3 配置>(触发器)动作(添加报警发送的媒介)

wps55 

 

4 管理—>用户—>Admin(配置告警接收人)

wps56 

 

5.3 自定义告警小结

1  /usr/local/zabbix/share/zabbix/alertscripts/ 目录下创建自定义告警脚本,并赋予可执行权限。

 

2 管理—>报警媒介类型

wps57 

 

3 配置>(触发器)动作

wps58 

 

4 管理>用户—>Admin(配置告警接收人)

wps59 

 

5 告警测试

停掉172.16.1.121 节点上的nginx 服务,告警邮件正常发送

wps60 

相反,启动 172.16.1.121 节点上的nginx 服务后,恢复告警发送。

三、企业监控案例

1.监控nginx 虚拟主机的连接状态

172.16.1.121 节点上进行操作

(1) nginx.conf 配置文件的虚拟主机中添加nginx状态配置

# vim /etc/nginx/nginx.conf

 

location /nginx_status {

          stub_status on;

          access_log off;

          allow 127.0.0.1;

          deny all;

        }

 

# systemctl restart nginx

 

(2) zabbix agent 中添加自定义监控项

# cat /etc/zabbix/zabbix_agentd.d/nginx_status.sh

#!/bin/bash

source /etc/profile

#active   当前活动的客户端连接数,包括waiting连接数

#accepts  已接受的客户端连接数

#handled  已处理的连接总数

#requests 客户端请求的总数

#reading  正在读取请求头的当前连接数

#writing  将响应写回客户端的当前连接数

#waiting  等待请求空闲客户端的当前连接数

 

result="/usr/bin/curl -s http://127.0.0.1:80/nginx_status"

case $1 in

    active)

        $result |awk '/Active/ {print $NF}'

    ;;

    accepts)

        $result |awk 'NR==3 {print $1}'

    ;;

    handled)

        $result |awk 'NR==3 {print $2}'

    ;;

    requests)

        $result |awk 'NR==3 {print $3}'

    ;;

    reading)

        $result |awk '/Reading/ {print $2}'

    ;;

    writing)

        $result |awk '/Writing/ {print $4}'

    ;;

    waiting)

         $result |awk '/Waiting/ {print $6}'

    ;;

          *)

    echo "USAGE:$0 {active|reading|writing|waiting|accepts|handled|requests}"

esac

 

# cat /etc/zabbix/zabbix_agentd.d/nginx_status.conf

UserParameter=nginx.status[*],/etc/zabbix/zabbix_agentd.d/nginx_status.sh $1

# systemctl restart zabbix-agent

 

(3)导入 "Template Nginx Status.xml" 监控模板

wps61 

(4) 172.16.1.121 监控主机链接 "Template Nginx Status" 模板

wps62 

 

(5) 查看监控项(状态都为"已启动",颜色为绿色)

wps63 

2.监控服务器TCP连接状态

172.16.1.121 节点上进行操作

(1) zabbix agent 中添加自定义监控项

# /usr/sbin/ss -antp | awk '{arry[$1]++}END{print arry["LISTEN"]}'

5

 

# cat /etc/zabbix/zabbix_agentd.d/tcp_status.conf

UserParameter=tcp.status[*],netstat -ant | grep -c $1

# UserParameter=tcp.status[*],/usr/sbin/ss -antp | awk '{arry[$$1]++}END{print arry["'$1'"]}'

 

# systemctl restart zabbix-agent.service

 

#  172.16.1.120 节点上进行测试

# /usr/local/zabbix/bin/zabbix_get -s 172.16.1.121 -k tcp.status[LISTEN]

5

 

(2)导入"Template Tcp Status.xml"模板

wps64 

 

(3) 172.16.1.121 监控主机链接 " Template Tcp Status" 模板

wps65 

 

(4) 查看监控项(状态都为"已启动",颜色为绿色)

wps66 

3.监控tomcat(jmx)

wps67 

(1) 安装tomcat

172.16.1.121 节点上进行操作

1) 安装jdk

# tar -xzf jdk-8u45-linux-x64.tar.gz

# mv jdk1.8.0_45/ /usr/local/jdk/

# sed -i '$a export JAVA_HOME=/usr/local/jdk\nexport CLASSPATH=$JAVA_HOME/lib/tools.jar:$JAVA_HOME/jre/lib/rt.jar\nexport PATH=$JAVA_HOME/bin:$PATH' /etc/profile

# source /etc/profile

2) 安装tomcat

# tar -xzf apache-tomcat-8.5.37.tar.gz

# mv apache-tomcat-8.5.37 /usr/local/tomcat/

3) 优化tomcat

内存优化

# "# OS specific support.  $var _must_ be set to either true or false."下配置

sed -i '117a \

JAVA_OPTS="-Djava.security.egd=file:/dev/./urandom \

           -Djava.awt.headless=true \

           -Dfile.encoding=UTF-8 \

           -server \

           -Xms1024m \

           -Xmx1024m \

           -XX:NewSize=512m \

           -XX:MaxNewSize=512m \

           -XX:PermSize=512m \

           -XX:MaxPermSize=512m"' \

/usr/local/tomcat/bin/catalina.sh

 

tomcat.service 相关

# [ -z "$CATALINA_BASE" ] && CATALINA_BASE="$CATALINA_HOME" 下面配置

sed -i '160a \

CATALINA_PID="$CATALINA_BASE/tomcat.pid"' \

/usr/local/tomcat/bin/catalina.sh

 

4) 添加jmx 参数

# "# OS specific support.  $var _must_ be set to either true or false."下配置

sed -i '117a \

CATALINA_OPTS="-Dcom.sun.management.jmxremote \

               -Dcom.sun.management.jmxremote.port=12345 \

               -Dcom.sun.management.jmxremote.authenticate=false \

               -Dcom.sun.management.jmxremote.ssl=false \

               -Djava.rmi.server.hostname=172.16.1.121"' \

/usr/local/tomcat/bin/catalina.sh

 

5) tomcat 添加systemctl 配置

# cat tomcat.service

# [unit]    配置了服务的描述,规定了在network启动之后执行

# [service] 配置服务的pid,服务的启动,停止,重启

# [install] 配置了使用用户

 

[Unit]

Description=tomcat Server 8.5

#After=syslog.target network.target remote-fs.target nss-lookup.target

 

[Service]

Type=forking

Environment="JAVA_HOME=/usr/local/jdk"

PIDFile=/usr/local/tomcat/tomcat.pid

ExecStart=/usr/local/tomcat/bin/startup.sh

ExecReload=/bin/kill -s HUP $MAINPID

ExecStop=/usr/local/tomcat/bin/shutdown.sh

PrivateTmp=true

Restart=on-failure

 

[Install]

WantedBy=multi-user.target

 

# systemctl daemon-reload

# systemctl start tomcat.service

# systemctl enable tomcat.service

# netstat -tunlp | grep java

wps68 

 

(2) 配置 zabbix-java-gateway

172.16.1.120 节点上进行操作

1) 配置 zabbix-java-gateway

# grep -Ev "^$|#" /usr/local/zabbix/sbin/zabbix_java/settings.sh

LISTEN_IP="0.0.0.0"

LISTEN_PORT=10052

PID_FILE="/tmp/zabbix_java.pid"

START_POLLERS=5

TIMEOUT=30

 

2) zabbix-java-gateway 加入systemctl

# cat zabbix-java-gateway.service

[Unit]

Description=Zabbix Java Gateway

After=syslog.target

After=network.target

 

[Service]

Type=forking

KillMode=process

PIDFile=/tmp/zabbix_java.pid

ExecStart=/usr/local/zabbix/sbin/zabbix_java/startup.sh

SuccessExitStatus=143

User=zabbix

Group=zabbix

 

[Install]

WantedBy=multi-user.target

 

3) 启动 zabbix-java-gateway

# systemctl daemon-reload

# systemctl start zabbix-java-gateway.service

# systemctl enable zabbix-java-gateway

# netstat -tunlp | grep java

tcp6       0      0 :::10052                :::*                    LISTEN      8464/java

 

4) 配置zabbix server 连接 zabbix-java-gateway

# vim /usr/local/zabbix/etc/zabbix_server.conf

JavaGateway=127.0.0.1

JavaGatewayPort=10052

StartJavaPollers=5

# systemctl restart zabbix-server.service

 

(3) 添加tomcat监控

172.16.1.121 监控主机上添加jmx 接口

wps69 

 

(4) 链接jmx 模板

wps70 

 

监控的172.16.1.121 主机上 jmx 可用性变绿,说明监控jmx成功

wps71 

(5) 查看收集的日志

堆栈内存

# 应用集:Memory

wps72 

堆栈老年代内存

# 应用集:Memory pool

wps73 

4. 监控mysql

4.1 配置mysql 主从同步

172.16.1.121 节点上安装MySQL5.7.30作为master节点

172.16.1.122 节点上安装MySQL5.7.30 作为slave节点

1172.16.1.121 节点上操作(mysql master)

(1) 安装依赖包

# yum install -y libaio

 

(2) 解压安装包

# tar -xzf mysql-5.7.30-linux-glibc2.12-x86_64.tar.gz

# mv mysql-5.7.30-linux-glibc2.12-x86_64/ /usr/local/mysql/

 

(3) 创建mysql 用户

# groupadd -g 1600 mysql

# useradd -g mysql -u 1600 mysql

# id mysql

uid=1600(mysql) gid=1600(mysql) =1600(mysql)

 

(4) 创建存放数据库文件的目录并赋予mysql安装目录的属主和属组都是mysql

# mkdir -p /usr/local/mysql/data/

# chown -R mysql.mysql /usr/local/mysql/

 

(5) 配置my.cnf 文件

# cat /etc/my.cnf

[client]

port = 3306

default-character-set = utf8

socket = /usr/local/mysql/data/mysql.sock

[mysql]

no-auto-rehash

[mysqld]

user = mysql

port = 3306

basedir = /usr/local/mysql

datadir = /usr/local/mysql/data

socket = /usr/local/mysql/data/mysql.sock

bind-address = 0.0.0.0

pid-file = /usr/local/mysql/data/mysqld.pid

character-set-server = utf8

collation-server = utf8_general_ci

log-error = /usr/local/mysql/data/mysqld.log

slow_query_log = ON

long_query_time = 2

slow_query_log_file = /usr/local/mysql/data/mysql-slow.log

max_connections = 10240

open_files_limit = 65535

innodb_buffer_pool_size = 1G

innodb_flush_log_at_trx_commit = 2

innodb_log_file_size = 256M

transaction_isolation = READ-COMMITTE

default-storage-engine = innodb

innodb_file_per_table = on

symbolic-links = 0

explicit_defaults_for_timestamp = 1

skip-name-resolve

lower_case_table_names = 1

server-id = 1

log-bin = /usr/local/mysql/data/mysql-bin

binlog_cache_size = 4M

binlog_format = mixed

max_binlog_cache_size = 8M

max_binlog_size = 512M

expire_logs_days = 14

[mysqldump]

quick

max_allowed_packet = 32M

 

(6) 初始化mysql 数据库

# /usr/local/mysql/bin/mysqld --initialize --user=mysql --basedir=/usr/local/mysql --datadir=/usr/local/mysql/data

 

(7) mysql 服务加入到systemctl

# cat /usr/lib/systemd/system/mysqld.service

[Unit]

Description=MySQL Server

Documentation=man:mysqld(8)

Documentation=http://dev.mysql.com/doc/refman/en/using-systemd.html

After=network.target

After=syslog.target

[Install]

WantedBy=multi-user.target

[Service]

User=mysql

Group=mysql

Type=forking

PIDFile=/usr/local/mysql/data/mysqld.pid

TimeoutSec=0

PermissionsStartOnly=true

# ExecStartPre=/usr/local/mysql/bin/mysqld_pre_systemd

ExecStart=/usr/local/mysql/bin/mysqld --defaults-file=/etc/my.cnf --daemonize --pid-file=/usr/local/mysql/data/mysqld.pid $MYSQLD_OPTS

EnvironmentFile=-/etc/sysconfig/mysql

LimitNOFILE = 65535

Restart=on-failure

RestartPreventExitStatus=1

PrivateTmp=false

 

(8) 启动mysql 数据库

# systemctl daemon-reload

# systemctl start mysqld.service

# systemctl enable mysqld.service

# cat >> /etc/profile << EOF

export PATH=/usr/local/mysql/bin/:\$PATH

EOF

# source /etc/profile

 

(9) 修改数据库密码

# grep 'temporary password' /usr/local/mysql/data/mysqld.log

2020-08-21T05:09:58.745255Z 1 [Note] A temporary password is generated for root@localhost: UTu>tfpa_2&;

# mysql -uroot -p'UTu>tfpa_2&;'

mysql> ALTER USER 'root'@'localhost' IDENTIFIED BY 'Liuchang@2020';

 

2172.16.1.122 节点上操作(mysql slave)

(1) 安装依赖包

# yum install -y libaio

 

(2) 解压安装包

# tar -xzf mysql-5.7.30-linux-glibc2.12-x86_64.tar.gz

# mv mysql-5.7.30-linux-glibc2.12-x86_64/ /usr/local/mysql/

 

(3) 创建mysql 用户

# groupadd -g 1600 mysql

# useradd -g mysql -u 1600 mysql

# id mysql

uid=1600(mysql) gid=1600(mysql) =1600(mysql)

 

(4) 创建存放数据库文件的目录并赋予mysql安装目录的属主和属组都是mysql

# mkdir -p /usr/local/mysql/data/

# chown -R mysql.mysql /usr/local/mysql/

 

(5) 配置my.cnf 文件

# cat /etc/my.cnf

[client]

port = 3306

default-character-set = utf8

socket = /usr/local/mysql/data/mysql.sock

[mysql]

no-auto-rehash

[mysqld]

user = mysql

port = 3306

basedir = /usr/local/mysql

datadir = /usr/local/mysql/data

socket = /usr/local/mysql/data/mysql.sock

bind-address = 0.0.0.0

pid-file = /usr/local/mysql/data/mysqld.pid

character-set-server = utf8

collation-server = utf8_general_ci

log-error = /usr/local/mysql/data/mysqld.log

slow_query_log = ON

long_query_time = 2

slow_query_log_file = /usr/local/mysql/data/mysql-slow.log

max_connections = 10240

open_files_limit = 65535

innodb_buffer_pool_size = 1G

innodb_flush_log_at_trx_commit = 2

innodb_log_file_size = 256M

transaction_isolation = READ-COMMITTE

default-storage-engine = innodb

innodb_file_per_table = on

symbolic-links = 0

explicit_defaults_for_timestamp = 1

skip-name-resolve

lower_case_table_names = 1

server-id = 2

slave-skip-errors=1007,1022,1050,1062,1169

relay-log=/usr/local/mysql/data/relay-log

max_relay_log_size=512M

relay-log-purge=ON

read-only

[mysqldump]

quick

max_allowed_packet = 32M

 

(6) 初始化mysql 数据库

# /usr/local/mysql/bin/mysqld --initialize --user=mysql --basedir=/usr/local/mysql --datadir=/usr/local/mysql/data

 

(7) mysql 服务加入到systemctl

# cat /usr/lib/systemd/system/mysqld.service

[Unit]

Description=MySQL Server

Documentation=man:mysqld(8)

Documentation=http://dev.mysql.com/doc/refman/en/using-systemd.html

After=network.target

After=syslog.target

[Install]

WantedBy=multi-user.target

[Service]

User=mysql

Group=mysql

Type=forking

PIDFile=/usr/local/mysql/data/mysqld.pid

TimeoutSec=0

PermissionsStartOnly=true

# ExecStartPre=/usr/local/mysql/bin/mysqld_pre_systemd

ExecStart=/usr/local/mysql/bin/mysqld --defaults-file=/etc/my.cnf --daemonize --pid-file=/usr/local/mysql/data/mysqld.pid $MYSQLD_OPTS

EnvironmentFile=-/etc/sysconfig/mysql

LimitNOFILE = 65535

Restart=on-failure

RestartPreventExitStatus=1

PrivateTmp=false

 

(8) 启动mysql 数据库

# systemctl daemon-reload

# systemctl start mysqld.service

# systemctl enable mysqld.service

# cat >> /etc/profile << EOF

export PATH=/usr/local/mysql/bin/:\$PATH

EOF

# source /etc/profile

 

(9) 修改数据库密码

# grep 'temporary password' /usr/local/mysql/data/mysqld.log

2020-08-21T05:09:59.191349Z 1 [Note] A temporary password is generated for root@localhost: ,8!%d+:P&w7v

# mysql -uroot -p',8!%d+:P&w7v'

mysql> ALTER USER 'root'@'localhost' IDENTIFIED BY 'Liuchang@2021';

 

3 主从同步配置

(1) 主库操作,172.16.1.121节点

备份数据库:

# mysqldump -uroot -p'Liuchang@2020' -A -F -R --single-transaction --master-data=1 --events --hex-blob --triggers --flush-privileges | gzip > /tmp/mysql_master_20200821.sql.bak.gz

##################备份参数说明##################

# 普通备份参数:

# -R: 转储存储的例程(功能和过程);

# -E: --events:转储事件;

# -A: 转储所有数据库, 这将与--databases以及所有已选择的数据库相同;

# -B: 转储多个数据库,增加建库语句和use连接库的语句;

# --hex-blob: 转储十六进制格式的二进制字符串(BINARYVARBINARYBLOB);

# --triggers: 为每个转储的表转储触发器;

# --flush-privileges: 转储mysql数据库后,发出FLUSH PRIVILEGES语句;

# --single-transaction: 设置事务的隔离级别为可重复读(REPEATABLE READ),用于热备,只适用于MySQL InnoDB引擎。这样能保证在一个事务中所有相同的查询读取到同样的数据,也就大概保证了在dump期间,如果其他innodb引擎的线程修改了表的数据并提交,对该dump线程的数据并无影响,在这期间不会锁表。

 

# 用于MySQL开启binlog时的参数:

# -F: 开始转储之前刷新服务器中的日志文件;

# --master-data=1: 备份中增加binlog日志文件名及对应的位置点,1不加注释,2加注释;如下(记录-F刷新后binlog日志位置点)

CHANGE MASTER TO MASTER_LOG_FILE='mysql-bin.000016', MASTER_LOG_POS=154;

 

# -d:没有行信息,只备份表结构,不备份表数据; Usage: -d <数据库> <表名称,多个表名称可用空格隔开>

# -t:不要写表创建信息,只备份表数据,不备份表结构;Usage: -t <数据库> <表名称,多个表名称可用空格隔开>

################################################

 

拷贝主库备份到从库服务器上:

# scp -rp /tmp/mysql_master_20200821.sql.bak.gz root@172.16.1.122:/tmp/

 

创建从库用于复制的账号:

# mysql -uroot -p'Liuchang@2020'

mysql> grant replication slave on *.* to 'repl'@'172.16.1.%' identified by 'Liuchang@2022';

mysql> flush privileges;

 

(2) 从库操作,172.16.1.122节点

还原备份的主库:

# gzip -d /tmp/mysql_master_20200821.sql.bak.gz

# mysql -uroot -p'Liuchang@2021' < /tmp/mysql_master_20200821.sql.bak

# 此时root 密码发生了改变,变成了 master root 密码 "Liuchang@2020"

 

执行CHANG MASTER语句, 不需要MASTER_LOG_FILE文件和MASTER_LOG_POS配置位置点,因为

在备份主库时 --master-data=1 参数已经记录了下来:

# mysql -uroot -p'Liuchang@2020'

mysql>

CHANGE MASTER TO

MASTER_HOST='172.16.1.121',

MASTER_PORT=3306,

MASTER_USER='repl',

MASTER_PASSWORD='Liuchang@2022';

 

开启从库:

mysql> start slave;

 

查看从库状态,确定从库是否启动成功:

mysql> show slave status/G;

wps74 

wps75 

 

4 主从配置补充

(1) 查看主库的状态

mysql> show master status;

wps76 

 

(2) 完整的CHANG MASTER配置

CHANGE MASTER TO

MASTER_HOST='172.16.1.121',

MASTER_PORT=3306,

MASTER_USER='repl',

MASTER_PASSWORD='Liuchang@2022',

MASTER_LOG_FILE='mysql-bin.000016',

MASTER_LOG_POS=598;

 

4.2 zabbix 监控 mysql

172.16.1.122节点上进行操作

1 创建监控用户,在172.16.1.121 节点主数据库上操作,账户会自动同步到172.16.1.122 节点的从数据库上,

如果在从库建立账户会导致从库报sql异常。

mysql> CREATE USER 'zbx_monitor'@'%' IDENTIFIED BY 'zabbix5.0@2023';

mysql> GRANT USAGE,REPLICATION CLIENT,PROCESS,SHOW DATABASES,SHOW VIEW ON *.* TO 'zbx_monitor'@'%';

mysql> flush privileges;

 

2 自定义监控项脚本

# cat /etc/zabbix/zabbix_agentd.d/userparameter_mysql.conf

# 连接数

UserParameter=mysql.max_connections,echo "show variables where Variable_name='max_connections';" | /usr/local/mysql/bin/mysql -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' -N 2>/dev/null |awk '{print $2}'

UserParameter=mysql.current_connections,echo "show global status where Variable_name='Threads_connected';" | /usr/local/mysql/bin/mysql -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' -N 2>/dev/null |awk '{print $2}'

# 缓冲池

UserParameter=mysql.buffer_pool_size,echo "show variables where Variable_name='innodb_buffer_pool_size';" | /usr/local/mysql/bin/mysql -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' -N 2>/dev/null |awk '{printf "%.2f",$2/1024/1024/1024}'

UserParameter=mysql.buffer_pool_usage_percent,echo "show global status where Variable_name='Innodb_buffer_pool_pages_free' or Variable_name='Innodb_buffer_pool_pages_total';" | /usr/local/mysql/bin/mysql -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' -N 2>/dev/null | awk '{a[NR]=$2}END{printf "%.1f",100-((a[1]/a[2])*100)}'

# 增删改查

UserParameter=mysql.status[*],echo "show global status where Variable_name='$1';" | /usr/local/mysql/bin/mysql -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' -N 2>/dev/null | awk '{print $$2}'

# 实例状态

UserParameter=mysql.ping,/usr/local/mysql/bin/mysqladmin -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' ping 2>/dev/null | grep -c alive

UserParameter=mysql.version,/usr/local/mysql/bin/mysql -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' -V 2>/dev/null | awk '{print $5}' | awk -F ',' '{print $1}'

# 主从同步状态

UserParameter=mysql.slave[*],/etc/zabbix/zabbix_agentd.d/mysql_slave_status.sh "$1"

# 数据库大小

UserParameter=mysql.dbsize[*],/usr/local/mysql/bin/mysql -h'localhost' -u'zbx_monitor' -p'zabbix5.0@2023' -sN -e "SELECT SUM(DATA_LENGTH + INDEX_LENGTH) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA='$1'" 2>/dev/null |awk '{printf "%.2f",$$1/1024/1024}'

 

 

 

 

# cat /etc/zabbix/zabbix_agentd.d/mysql_slave_status.sh

#!/bin/bash

#Desc:用于获取主从同步信息,判断主从是否出现异常,然后提交给zabbix

source /etc/profile

 

#主机

HOST="localhost"

#用户

USER="zbx_monitor"

#密码

PASSWORD="zabbix5.0@2023"

#端口

PORT="3306"

#MySQL连接

CONNECTION="mysql -h${HOST} -P${PORT} -u${USER} -p${PASSWORD}"

 

if [ $# -ne "1" ];then

    echo -e "arg error! Usage: $0 [io | sql | delay]"

    exit 1

else

    #io sql 线程名

    NAME=$1

fi

 

function IO {

    Slave_IO_Running=`${CONNECTION} -e "show slave status\G;" 2> /dev/null |grep "Slave_IO_Running" |awk '{print $2}'`

    if [ "$Slave_IO_Running" == "Yes" ];then

        echo 0

    else

        echo 1

    fi

}

 

function SQL {

    Slave_SQL_Running=`${CONNECTION} -e "show slave status\G;" 2> /dev/null |grep "Slave_SQL_Running:" |awk '{print $2}'`

    if [ "$Slave_SQL_Running" == "Yes" ];then

        echo 0

    else

        echo 1

    fi

}

 

function DELAY {

    Seconds_Behind_Master=`${CONNECTION} -e "show slave status\G;" 2> /dev/null |grep "Seconds_Behind_Master:" |awk '{print $2}'`

    echo "$Seconds_Behind_Master"

}

 

case $NAME in

   io)

       IO

   ;;

   sql)

       SQL

   ;;

   delay)

       DELAY

   ;;

   *)

       echo -e "Usage: $0 [io | sql | delay]"

   ;;

esac

 

为脚本赋予执行权限

chmod +x /etc/zabbix/zabbix_agentd.d/mysql_slave_status.sh

 

3 导入"Template DB MySQL New.xml"模板 

wps77 

 

wps78 

 

 

 

4 查看监控数据

wps79 

5 监控docker 容器

172.16.1.1.122节点上进行操作

5.1 安装docker

安装依赖包

# yum install -y yum-utils device-mapper-persistent-data lvm2

 

添加Docker软件包源

# yum-config-manager \

    --add-repo \

    https://download.docker.com/linux/centos/docker-ce.repo

 

更新为阿里云的源

# wget -O /etc/yum.repos.d/docker-ce.repo http://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo

 

清理源缓存

# yum clean all

 

安装Docker CE

# yum install -y docker-ce

 

启动Docker服务并设置开机启动

# systemctl start docker

# systemctl enable docker

 

查看docker版本

# docker -v

Docker version 19.03.12, build 48a66213fe

 

添加阿里云的镜像仓库

# mkdir -p /etc/docker

# tee /etc/docker/daemon.json <<-'EOF'

{

  "registry-mirrors": ["https://b1cx9cn7.mirror.aliyuncs.com"]

}

EOF

 

重启docker

# systemctl daemon-reload

# systemctl restart docker

 

安装测试docker环境

# docker run -d --name test_nginx nginx

# docker run -d --name apache httpd

5.2 配置脚本

自动发现规则脚本

# cat /etc/zabbix/zabbix_agentd.d/docker.py

#!/usr/bin/python

import sys

import os

import json

 

 

def discover():

    d = {}

    d['data'] = []

    with os.popen("sudo docker ps -a --format {{.Names}}") as pipe:

        for line in pipe:

            info = {}

            info['{#CONTAINERNAME}'] = line.replace("\n","")

            d['data'].append(info)

 

    print json.dumps(d)

 

 

def status(name,action):

    if action == "ping":

        cmd = 'sudo docker inspect --format="{{.State.Running}}" %s' %name

        result = os.popen(cmd).read().replace("\n","")

        if result == "true":

            print 1

        else:

            print 0

    else:

        cmd = 'sudo docker stats %s --no-stream --format "{{.%s}}"' % (name,action)

        result = os.popen(cmd).read().replace("\n","")

        if "%" in result:

            print float(result.replace("%",""))

        else:

            print result

 

 

if __name__ == '__main__':

        try:

                name, action = sys.argv[1], sys.argv[2]

                status(name,action)

        except IndexError:

                discover()

 

为脚本赋予执行权限

# chmod +x /etc/zabbix/zabbix_agentd.d/docker.py

 

监控项脚本

# cat /etc/zabbix/zabbix_agentd.d/docker.conf

UserParameter=docker.discovery,/etc/zabbix/zabbix_agentd.d/docker.py

UserParameter=docker.[*],/etc/zabbix/zabbix_agentd.d/docker.py $1 $2

 

修改sudoers文件,为zabbix用户提权

# vim /etc/sudoers

zabbix    ALL=(ALL)    NOPASSWD: ALL

 

zabbix server 端测试

# /usr/local/zabbix/bin/zabbix_get -s 172.16.1.122 -k docker.discovery

{"data": [{"{#CONTAINERNAME}": "apache"}, {"{#CONTAINERNAME}": "test_nginx"}]}

# /usr/local/zabbix/bin/zabbix_get -s 172.16.1.122 -k docker.[test_nginx,MemPerc]

0.07

# /usr/local/zabbix/bin/zabbix_get -s 172.16.1.122 -k docker.[test_nginx,ping]

1

5.3 导入模板

wps80 

 

wps81 

 

wps82 

四、自动化监控

172.16.1.123节点进行操作

wps83 

1 网络自动发现

wps84 

原理:zabbix server 根据网段扫描zabbix agent 客户端,将扫描到的客户端加入zabbixserver监控。

注意:网络发现添加监控的主机名称是由"自动发现规则""设备唯一性标准(IP地址)"决定的。

1.1 创建网络自动发现规则

wps85 

 

wps86 

# /usr/local/zabbix/bin/zabbix_get -s 172.16.1.123 -k 'system.uname'

Linux slavenode3 3.10.0-862.el7.x86_64 #1 SMP Fri Apr 20 16:44:24 UTC 2018 x86_64

 

 

 

 

 

 

 

 

 

 

 

 

1.2 查看已经发现的主机

wps87 

 

1.3 配置"动作"对发现的主机进行操作

wps88 

wps89 

 

wps90 

 

 

 

 

 

 

 

 

 

 

 

1.4 查看"主机"

wps91 

2 自动注册

wps92 

原理:zabbix agent 主动向zabbix server 上报,zabbix server将上报的主机加入到zabbix server

注意:

(1) 主动注册添加监控的主机名称是由agent 配置文件中的Hostname 名称确定的

(2) 主动注册agent 监听的IP地址 ListenIP 必须为agent 所在服务器的ip地址,

如果监听的地址是 0.0.0.0 ,自动注册添加的主机"接口""0.0.0.0:10050",监控的

zabbix server主机上的 agent 

 

2.1 确保zabbix agent (172.16.1.123) 配置文件中有 ServerActive=172.16.1.120

Hostname=172.16.1.123 HostMetadata=linux 。网络发现和自动注册只使用一个

就好了,这里将网络发现禁用掉。

wps93 

 

2.2 创建自动注册

wps94 

 

wps95 

 

wps96 

默认标题:

Auto registration: {HOST.HOST}

消息内容:

Host name: {HOST.HOST}

Host IP: {HOST.IP}

Agent port: {HOST.PORT}

 

wps97 

 

 

 

 

 

 

 

 

 

2.3 查看"主机"

wps98 

3 zabbix 监控主动模式

注意:zabbix 监控主动模式下,前提条件如下:

(1) 确保zabbix agent (172.16.1.123) 配置文件中有如下配置

ServerActive=172.16.1.120

# 用于主动和zabbix server进行通信

Hostname=172.16.1.123

# zabbix server 监控主机的主机名

(2) 监控模板中的监控项必须为主动模式

3.1 全克隆"Template OS Linux"模板为"Template OS Linux Active"

wps99 

 

 

 

wps100 

 

3.2 选中"Template OS Linux Active"所有监控项,批量修改监控项类型为"zabbix客户端(主动式)"

wps101 

 

wps102 

 

3.3 手动修改"Template OS Linux Active"所有"自动发现规则"和对应的"监控项原型""zabbix客户端(主动式)"

wps103 

 

wps104 

 

wps105 

 

3.4 添加监控主机,链接模板为"Template OS Linux Active"

wps106 

 

wps107 

 

3.5 查看监控主机

wps108 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

# 因为使用的是 agent 主动模式监控主机,所以收集到的监控项数据基本上都是同一个时间

wps109 

4 zabbix-proxy

wps110 

适用环境:zabbix-server处于外网,zabbix-agent处于各个地方的内网,所有内网都是互通的。其中有一台内网服务器既可以和内网通信,也可以和外网进行通信,这台内网服务器就做zabbix-proxy服务器。

172.16.1.121 节点上操作

4.1 创建一个数据库供zabbix-proxy使用

# mysql -uroot -p'Liuchang@2020'

mysql> create database zabbix_proxy character set utf8 collate utf8_bin;

mysql> grant all on zabbix_proxy.* to zabbix_proxy@'%' identified by 'Liuchang@2024';

mysql> flush privileges;

 

4.2 安装zabbix-proxy 并进行配置

1 安装

# wget https://mirrors.tuna.tsinghua.edu.cn/zabbix/zabbix/4.0/rhel/7/x86_64/zabbix-proxy-mysql-4.0.23-1.el7.x86_64.rpm

# yum localinstall zabbix-proxy-mysql-4.0.23-1.el7.x86_64.rpm -y

 

2 导入初始化数据

查看初始化数据库文件存放路径

# rpm -ql zabbix-proxy-mysql

/usr/share/doc/zabbix-proxy-mysql-4.0.23/schema.sql.gz

 

导入库

# zcat /usr/share/doc/zabbix-proxy-mysql-4.0.23/schema.sql.gz |mysql -h 127.0.0.1 -uzabbix_proxy -p'Liuchang@2024' zabbix_proxy

 

3 编辑配置文件

# grep -Ev "^$|#" /etc/zabbix/zabbix_proxy.conf

ProxyMode=0

Server=172.16.1.120

ServerPort=10051

Hostname=zabbix_proxy

HostnameItem=system.hostname

ListenPort=10051

LogFile=/var/log/zabbix/zabbix_proxy.log

LogFileSize=0

PidFile=/var/run/zabbix/zabbix_proxy.pid

SocketDir=/var/run/zabbix

DBHost=localhost

DBName=zabbix_proxy

DBUser=zabbix_proxy

DBPassword=Liuchang@2024

DBSocket=/usr/local/mysql/data/mysql.sock

DBPort=3306

SNMPTrapperFile=/var/log/snmptrap/snmptrap.log

ListenIP=172.16.1.121

Timeout=30

ExternalScripts=/usr/lib/zabbix/externalscripts

LogSlowQueries=3000

 

参数说明:

ProxyMode=0

# 启用zabbix-proxy 主动模式

Server=172.16.1.120

ServerPort=10051

# zabbix-proxy 连接zabbix-server IP地址和端口号

Hostname=zabbix_proxy

# zabbix-proxy 主动模式下向zabbix-server检查的主机名

ListenPort=10051

# zabbix-proxy 监听的端口号

 

DBHost=localhost

DBName=zabbix_proxy

DBUser=zabbix_proxy

DBPassword=Liuchang@2024

DBSocket=/usr/local/mysql/data/mysql.sock

DBPort=3306

# 以上6行是连接数据库的配置

 

ListenIP=172.16.1.121

# zabbix-proxy 监听的ip地址

 

4.3 启动zabbix-proxy 并加入开机自启动

# systemctl start zabbix-proxy.service

# systemctl enable zabbix-proxy.service

# netstat -tunlp | grep zabbix_proxy

tcp        0      0 172.16.1.121:10051      0.0.0.0:*               LISTEN      27591/zabbix_proxy

 

4.4 zabbix-agent配置文件,在172.16.1.123节点上配置

# cat /etc/zabbix/zabbix_agentd.conf

PidFile=/var/run/zabbix/zabbix_agentd.pid

LogFile=/var/log/zabbix/zabbix_agentd.log

LogFileSize=0

DebugLevel=3

EnableRemoteCommands=0

Server=172.16.1.121

ListenPort=10050

ListenIP=172.16.1.123

StartAgents=3

ServerActive=172.16.1.121

Hostname=172.16.1.123

HostnameItem=system.hostname

HostMetadata=linux

HostMetadataItem=system.uname

RefreshActiveChecks=120

BufferSend=5

BufferSize=100

MaxLinesPerSecond=20

Timeout=30

AllowRoot=0

User=zabbix

Include=/etc/zabbix/zabbix_agentd.d/*.conf

 

说明:配置文件必须保证有以下三个参数

Server=172.16.1.121

#  配置的是zabbix-serverIP地址,允许zabbix-proxy 连接agent

ServerActive=172.16.1.121

#  agent 主动模式下,agent 主动向zabbix-proxy IP发送数据。

Hostname=172.16.1.123

#  agent 主动模式下,agent 主动向zabbix-proxy 检查监控的主机名。

 

重启zabbix-agent

# systemctl restart zabbix-agent.service

 

 

 

4.5 Server web配置

1 创建一个agent 代理程序

wps111 

 

wps112 

提示:

(1) 如果"agent代理程序名称"填写不正确,zabbix-proxy 日志中会报如下错误:

1542:20200824:214144.860 cannot send proxy data to server at "172.16.1.120": proxy "zabbix_proxy" not found

(1) 如果"代理地址"填写不正确,zabbix-proxy 日志中会报如下错误:

99730:20200824:165910.834 cannot send proxy data to server at "172.16.1.120": connection is not allowed

wps113 

 

2 添加主机使用"Template OS Linux"被动监控项模板

wps114 

 

wps115 

zabbix-proxy 日志会输出如下内容

18206:20200824:215608.924 enabling Zabbix agent checks on host "172.16.1.123 Passive": host became available

 

wps116 

 

wps117 

 

3 添加主机使用"Template OS Linux Active"主动监控项模板

wps118 

 

 

wps119 

 

# 如果zax 指示灯不亮,可以重启一下zabbix_proxy,刷新下界面就好了

wps120 

 

wps121 

五、Grafana数据可视化

172.16.1.120 节点上进行操作,需要docker环境,参考上面步骤

1.安装grafana

因为zabbix 不支持grafana 7 以上的版本,所以这里使用grafana 6.7.4 版本

如果以docker容器的方式启动grafana无法安装zabbix插件,这里使用官方的rpm进行安装。

# wget https://dl.grafana.com/oss/release/grafana-6.7.4-1.x86_64.rpm

# yum localinstall grafana-6.7.4-1.x86_64.rpm -y

# systemctl start grafana-server.service

# systemctl enable grafana-server.service

 

访问grafanahttp://172.16.1.120:3000/

wps122 

2.grafana连接zabbix

安装zabbix 插件

wps123 

https://grafana.com/grafana/plugins/alexanderzobnin-zabbix-app

# grafana-cli plugins install alexanderzobnin-zabbix-app

# systemctl restart grafana-server.service

 

启用zabbix 插件

wps124 

 

添加grafana的数据来源为zabbix

wps125 

 

wps126 

wps127 

 

3 监控docker

172.16.1.120 节点上部署Prometheus

# mkdir -p /opt/prometheus/

# cd /opt/ prometheus/

# 上传prometheus.yml,并修改配置文件连接cadvisor

# docker run -d \

--name prometheus \

-p 9090:9090 \

-v /opt/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml \

--restart=always \

prom/prometheus

 

172.16.1.122节点上部署cadvisor

docker run \

--volume=/:/rootfs:ro \

--volume=/var/run:/var/run:ro \

--volume=/sys:/sys:ro \

--volume=/var/lib/docker/:/var/lib/docker:ro \

--volume=/dev/disk/:/dev/disk:ro \

--publish=8090:8080 \

--detach=true \

--name cadvisor \

--privileged \

--device=/dev/kmsg \

--restart=always \

google/cadvisor:latest

# 查看cadvisor监控容器暴露的监控值http://172.16.1.122:8090/metrics

# 访问:http://172.16.1.120:9090/

wps128 

 

grafana 中添加Prometheus数据源

wps129 

 

wps130 

 

 

 

4 grafana可视化图形展示

添加插件的位置:

wps131 

 

添加数据源为zabbix 的监控模板

https://grafana.com/grafana/dashboards/5363

https://grafana.com/grafana/dashboards/5456

https://grafana.com/grafana/dashboards/11209

wps132 

 

wps133 

 

添加数据源为Prometheusdocker监控模板

https://grafana.com/dashboards/193

wps134 

 

wps135 

 

添加数据源为zabbix esxi 主机模板

# esxi 主机https://grafana.com/grafana/dashboards/10798

wps136 

wps137 

 

# esxi 主机下的虚拟机https://grafana.com/grafana/dashboards/10799

wps138 

5 grafana 如何自定义创建图表

创建dashboard

wps139 

 

设置变量

wps140 

 

设置主机组变量

wps141 

 

设置主机变量

wps142 

 

保存表盘

wps143 

完成

wps144 

 

根据需要添加监控图标,然后保存即可

wps145 

 

wps146 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

posted @ 2021-04-26 04:26  云起时。  阅读(188)  评论(0编辑  收藏  举报