Nagios安装部署
Nagios工作原理
Nagios主要配置文件
Nagios软件包
链接:http://pan.baidu.com/s/1skGSBIP 密码:6fwr
Nagios服务端环境及依赖
cat /etc/redhat-release CentOS release 6.6 (Final) uname -rm 2.6.32-504.el6.x86_64 x86_64 echo "export LC_ALL=C" >> /etc/profile # 恢复默认语言环境 tail -1 /etc/profile source /etc/profile echo $LC_ALL yum install gcc glibc glibc-common gd-devel mysql-server httpd php php-gd -y rpm -qa mysql httpd php useradd nagios groupadd nagcmd usermod -a -G nagcmd nagios usermod -a -G nagcmd apache groups nagios groups apache
Nagios服务端主程序安装
mkdir -p /server/tools/nagios cd /server/tools/nagios rz -y #上传软件包 tree /server/tools/nagios /etc/init.d/httpd start lsof -i :80 tar xf nagios-3.5.1.tar.gz cd nagios ./configure --with-command-group=nagcmd make all make install make install-init make install-config make install-commandmode make install-webconf cd .. htpasswd -bc /usr/local/nagios/etc/htpasswd.users peter 123456 cat /usr/local/nagios/etc/htpasswd.users /etc/init.d/httpd reload sed -i 's#nagios@localhost#asdftttt@163.com#g' /usr/local/nagios/etc/objects/contacts.cfg sed -n '35p' /usr/local/nagios/etc/objects/contacts.cfg echo -e "set from=asdftttt@163.com\nset smtp=smtp.163.com smtp-auth-user=asdftttt smtp-auth-password=xxxxx smtp-auto=login" >> /etc/mail.rc echo "#time sync by peter at 2017-9-14" >> /var/spool/cron/root #时间同步 echo "*/5 * * * * /usr/sbin/ntpdate time.nist.gov > /dev/null 2>&1" >> /var/spool/cron/root crontab -l /etc/init.d/httpd restart chkconfig httpd on netstat -nutlp | grep httpd #网页 172.16.1.53/nagios
Nagios插件包安装
yum install perl-devel perl-CPAN openssl-devel -y cd /server/tools/nagios tar xf nagios-plugins-1.4.16.tar.gz cd nagios-plugins-1.4.16 ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql make make install cd .. ls /usr/local/nagios/libexec/ | wc -l 59 #也可能61
Nrpe软件安装
tar xf nrpe-2.12.tar.gz cd nrpe-2.12 ./configure make all make install-plugin make install-daemon make install-daemon-config cd .. ls /usr/local/nagios/libexec/check_nrpe #服务端安装nrpe是为了获得check_nrpe插件
Nagios服务端配置和启动
chkconfig nagios on chkconfig --list nagios /etc/init.d/nagios checkconfig Running configuration check... OK. /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg vim /etc/init.d/nagios +183 $NagiosBin -v $NagiosCfgFile /etc/init.d/nagios checkconfig /etc/init.d/nagios start Starting nagios: done. ps -ef | grep nagios nagios 47748 1 0 15:22 ? 00:00:00 /usr/local/nagios/bin/nagios -d /usr/local/nagios/etc/nagios.cfg
Nagios客户端环境准备(web01 web02)
yum install gcc glibc glibc-common mysql-server perl-devel perl-CPAN openssl-devel -y mkdir -p /server/tools/nagios cd /server/tools/nagios rz -y useradd nagios -M -s /sbin/nologin
Nagios插件包安装
tar xf nagios-plugins-1.4.16.tar.gz cd nagios-plugins-1.4.16 ./configure --with-nagios-user=nagios --with-nagios-group=nagios --enable-perl-modules --with-mysql make make install cd .. ls /usr/local/nagios/libexec/ | wc -l
Nrpe服务安装
tar xf nrpe-2.12.tar.gz cd nrpe-2.12 ./configure make all make install-plugin make install-daemon make install-daemon-config cd ..
安装check_iostat插件依赖包
for n in Params-Validate-0.91 Class-Accessor-0.31 Config-Tiny-2.12 Math-Calc-Units-1.07 Regexp-Common-2010010201 Nagios-Plugin-0.34 do tar xf ${n}.tar.gz cd $n perl Makefile.PL make make install cd .. done
配置监控内存、磁盘I/O脚本插件
yum install -y sysstat dos2unix cp /server/tools/nagios/check_memory.pl /usr/local/nagios/libexec cp /server/tools/nagios/check_iostat /usr/local/nagios/libexec chmod 755 /usr/local/nagios/libexec/check_memory.pl chmod 755 /usr/local/nagios/libexec/check_iostat dos2unix /usr/local/nagios/libexec/check_memory.pl dos2unix /usr/local/nagios/libexec/check_iostat
客户端Nrpe服务配置
cd /usr/local/nagios/etc sed -i 's#allowed_hosts=127.0.0.1#allowed_hosts=127.0.0.1,172.16.1.53#g' nrpe.cfg vim nrpe.cfg +199 #删掉原来的 command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20 command[check_mem]=/usr/local/nagios/libexec/check_memory.pl -w 10% -c 3% command[check_disk]=/usr/local/nagios/libexec/check_disk -w 15% -c 7% -p / command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10% command[check_iostat]=/usr/local/nagios/libexec/check_iostat -w 6 -c 10 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d netstat -nutlp | grep nrpe tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN 39042/nrpe #重启nrpe pkill nrpe /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d #加入开机启动 echo "#nagios nrpe process cmd by peter 2017-08-31" >> /etc/rc.local echo "/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d" >> /etc/rc.local tail -2 /etc/rc.local
Nagios服务端配置文件设置
vim /usr/local/nagios/etc/nagios.cfg +34 cfg_file=/usr/local/nagios/etc/objects/hosts.cfg cfg_file=/usr/local/nagios/etc/objects/services.cfg cfg_dir=/usr/local/nagios/etc/objects/services #该目录下只要以.cfg结尾都会被加载 # Definitions for monitoring the local (Linux) host #cfg_file=/usr/local/nagios/etc/objects/localhost.cfg cd /usr/local/nagios/etc/objects/ head -51 localhost.cfg > hosts.cfg chown nagios.nagios /usr/local/nagios/etc/objects/hosts.cfg touch services.cfg chown nagios.nagios services.cfg mkdir services chown -R nagios.nagios services vim hosts.cfg # Define a host for the local machine define host{ use linux-server ; Name of host template to use ; This host definition will inherit all variables that are defined ; in (or inherited by) the linux-server host template definition. host_name 13-web01 alias 13-web01 address 172.16.1.13 } define host{ use linux-server ; Name of host template to use ; This host definition will inherit all variables that are defined ; in (or inherited by) the linux-server host template definition. host_name 14-web02 alias 14-web02 address 172.16.1.14 } # Define an optional hostgroup for Linux machines define hostgroup{ hostgroup_name linux-servers ; The name of the hostgroup alias Linux Servers ; Long name of the group members 13-web01,14-web02 ; Comma separated list of hosts that belong to this group } vim services.cfg define service { use generic-service host_name 13-web01,14-web02 service_description Disk Partition check_command check_nrpe!check_disk } define service { use generic-service host_name 13-web01,14-web02 service_description Swap Usage check_command check_nrpe!check_swap } define service { use generic-service host_name 13-web01,14-web02 service_description Mem Usage check_command check_nrpe!check_mem } define service { use generic-service host_name 13-web01,14-web02 service_description Current Load check_command check_nrpe!check_load } define service { use generic-service host_name 13-web01,14-web02 service_description Disk IOstat check_command check_nrpe!check_iostat } define service { use generic-service host_name 13-web01,14-web02 service_description PING check_command check_ping!100.0,20%!500.0,60% } vim commands.cfg define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ } /etc/init.d/nagios checkconfig /etc/init.d/nagios start /etc/init.d/nagios reload cd /usr/local/nagios/etc sed -i 's#nagiosadmin#peter#g' cgi.cfg grep "^authorized_for" cgi.cfg /etc/init.d/nagios reload #监控URL vim objects/services.cfg define service { use generic-service host_name 13-web01 service_description blog_url check_command check_http!-H 172.16.1.13 } define service { use generic-service host_name 14-web02 service_description blog_url check_command check_http!-H 172.16.1.14 } /etc/init.d/nagios checkconfig /etc/init.d/nagios reload #监控TCP端口 vim objects/services.cfg define service { use generic-service host_name 13-web01,14-web02 service_description http_80 check_command check_tcp!80 } define service { use generic-service host_name 13-web01,14-web02 service_description ssh_22 check_command check_tcp!22 } /etc/init.d/nagios checkconfig /etc/init.d/nagios reload
Nagios配置出图
yum install cairo pango zlib-devel freetype-devel gd-devel libart_lgpl-devel rrdtool-devel perl-Time-HiRes -y rpm -qa cairo pango zlib zlib-devel freetype freetype-devel gd gd-devel libart_lgpl libart_lgpl-devel rrdtool rrdtool-devel perl-Time-HiRes cd /server/tools/nagios/ tar xf pnp-0.4.14.tar.gz cd pnp-0.4.14 ./configure --with-rrdtool --with-perfdata-dir=/usr/local/nagios/share/perfdata/ make all make install make install-config make install-init ll /usr/local/nagios/libexec/ | grep process vim /usr/local/nagios/etc/nagios.cfg +835 process_performance_data=1 ... host_perfdata_command=process-host-perfdata service_perfdata_command=process-service-perfdata vim /usr/local/nagios/etc/objects/commands.cfg +227 # 'process-host-perfdata' command definition define command{ command_name process-host-perfdata command_line /usr/local/nagios/libexec/process_perfdata.pl } # 'process-service-perfdata' command definition define command{ command_name process-service-perfdata command_line /usr/local/nagios/libexec/process_perfdata.pl } /etc/init.d/nagios checkconfig /etc/init.d/nagios reload #网页 172.16.1.53/nagios/pnp vim /usr/local/nagios/etc/objects/hosts.cfg # Define a host for the local machine define host{ use linux-server ; Name of host template to use ; This host definition will inherit all variables that are defined ; in (or inherited by) the linux-server host template definition. host_name 13-web01 alias 13-web01 address 172.16.1.13 process_perf_data 1 } define host{ use linux-server ; Name of host template to use ; This host definition will inherit all variables that are defined ; in (or inherited by) the linux-server host template definition. host_name 14-web02 alias 14-web02 address 172.16.1.14 process_perf_data 1 } #service.cfg中每个服务下也要添加,但generic-service模板中已经定义好了 /etc/init.d/nagios checkconfig /etc/init.d/nagios reload #网页 172.16.1.53/nagios/pnp
整合PNP URL到Nagios Web界面
vim objects/hosts.cfg # Define a host for the local machine define host{ use linux-server ; Name of host template to use ; This host definition will inherit all variables that are defined ; in (or inherited by) the linux-server host template definition. host_name 13-web01 alias 13-web01 address 172.16.1.13 process_perf_data 1 action_url /nagios/pnp/index.php?host=$HOSTNAME$ } define host{ use linux-server ; Name of host template to use ; This host definition will inherit all variables that are defined ; in (or inherited by) the linux-server host template definition. host_name 14-web02 alias 14-web02 address 172.16.1.14 process_perf_data 1 action_url /nagios/pnp/index.php?host=$HOSTNAME$ } vim objects/templates.cfg #直接配置services.cfg中使用的模板文件 define service{ name generic-service ; The 'name' of this service template active_checks_enabled 1 ; Active service checks are enabled passive_checks_enabled 1 ; Passive service checks are enabled/accepted parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems) obsess_over_service 1 ; We should obsess over this service (if necessary) check_freshness 0 ; Default is to NOT check service 'freshness' notifications_enabled 1 ; Service notifications are enabled event_handler_enabled 1 ; Service event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts is_volatile 0 ; The service is not volatile check_period 24x7 ; The service can be checked at any time of the day max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state normal_check_interval 10 ; Check the service every 10 minutes under normal conditions retry_check_interval 2 ; Re-check the service every two minutes until a hard state can be determined contact_groups admins ; Notifications get sent out to everyone in the 'admins' group notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events notification_interval 60 ; Re-notify about service problems every hour notification_period 24x7 ; Notifications can be sent out at any time register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! action_url /nagios/pnp/index.php?host=$HOSTNAME$&srv=$SERVICEDESC$ } /etc/init.d/nagios checkconfig /etc/init.d/nagios reload #网页 172.16.1.53/nagios #服务状态数据存放目录,注意备份 tree /usr/local/nagios/share/perfdata /usr/local/nagios/share/perfdata |-- 13-web01 | |-- Current_Load.rrd | |-- Current_Load.xml | |-- Disk_IOstat.rrd | |-- Disk_IOstat.xml | |-- Disk_Partition.rrd | |-- Disk_Partition.xml | |-- Mem_Usage.rrd | |-- Mem_Usage.xml | |-- PING.rrd | |-- PING.xml | |-- Swap_Usage.rrd | |-- Swap_Usage.xml | |-- blog_url.rrd | |-- blog_url.xml | |-- http_80.rrd | |-- http_80.xml | |-- ssh_22.rrd | `-- ssh_22.xml `-- 14-web02 |-- Current_Load.rrd |-- Current_Load.xml |-- Disk_IOstat.rrd |-- Disk_IOstat.xml |-- Disk_Partition.rrd |-- Disk_Partition.xml |-- Mem_Usage.rrd |-- Mem_Usage.xml |-- PING.rrd |-- PING.xml |-- Swap_Usage.rrd |-- Swap_Usage.xml |-- blog_url.rrd |-- blog_url.xml |-- http_80.rrd |-- http_80.xml |-- ssh_22.rrd `-- ssh_22.xml
Nagios故障报警(邮件)
vim objects/contacts.cfg email asdftttt@163.com vim objects/commands.cfg #优化一下报警信息 # 'notify-host-by-email' command definition define command{ command_name notify-host-by-email command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n\nDate/Time: $LONGDATETIME$\n" | /bin/mail -s "Host $HOSTSTATE$ alert for $HOSTNAME$!" $CONTACTEMAIL$ } # 'notify-service-by-email' command definition define command{ command_name notify-service-by-email command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVICESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /bin/mail -s " $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ " $CONTACTEMAIL$ }
Shell开发Nagios插件
#Nagios为插件提供两个接口:退出状态码和控制台打印的第一行信息 #主动模式:编写一个探测url的插件 cd /usr/local/nagios/libexec vim check_url.sh #!/bin/bash PROGNAME=`basename $0` PROGPATH=`dirname $0` usage() { echo "Usage: /bin/sh $PROGNAME url" exit 1 } [ $# -ne 1 ]&&usage . $PROGPATH/utils.sh if wget -T 20 --spider $1 > /dev/null 2>&1;then echo 'url $1 OK' exit $STATE_OK else echo 'url $1 NO' exit $STATE_CRITICAL fi #[root@mage-monitor-01 libexec]# cat ./utils.sh #! /bin/sh # #STATE_OK=0 #STATE_WARNING=1 #STATE_CRITICAL=2 #STATE_UNKNOWN=3 #STATE_DEPENDENT=4 sh /usr/local/nagios/libexec/check_url 172.16.1.14 chmod +x check_url.sh cd /usr/local/nagios/etc/objects/ vim commands.cfg #'check_url' command defined by peter define command{ command_name check_url command_line $USER1$/check_url.sh 172.16.1.14 } vim services.cfg define service { use generic-service host_name 14-web02 service_description check_url check_command check_url } E /etc/init.d/nagios checkconfig /etc/init.d/nagios reload #被动模式:监控/etc/passwd文件是否变化 #web02 md5sum /etc/passwd > /opt/ps.md5 cat /opt/ps.md5 70fe6e84988c7298fe6c5f108e02df39 /etc/passwd cd /usr/local/nagios/libexec/ vim check_passwd.sh #!/bin/bash OriMd5="70fe6e84988c7298fe6c5f108e02df39" CurrMd5=`md5sum /etc/passwd|cut -c 1-32` if [ "$OriMd5" == "$CurrMd5" ];then echo "/etc/passwd:ok" exit 0 else echo "/etc/passwd:failed" exit 2 fi sh check_passwd.sh chmod +x check_passwd.sh cd /usr/local/nagios/etc/ vim nrpe.cfg command[check_passwd]=/usr/local/nagios/libexec/check_passwd.sh netstat -nutlp | grep nrpe pkill nrpe /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d netstat -nutlp | grep nrpe #nagios服务端 vim services.cfg define service { use generic-service host_name 14-web02 service_description check_passwd check_command check_nrpe!check_passwd } /etc/init.d/nagios checkconfig /etc/init.d/nagios reload