nagios --centos7.3
nagios --centos7.3
准备一台虚拟机开始安装nagios
安装前准备:
1,主机名
2,关闭firewalld,selinux
3,关闭NetworkManager,并配置静态ip
4,配置本地yum,epel源,163源
5,时间同步
安装步骤:
1,搭建rpm版lamp(源码版lamp也可以,但nginx不行,因为后面nagios的web子配置文件里的语法都是apache的语法)
# yum install httpd httpd-devel gd gd-devel php
2,安装nagios
yum install nagios\*
安装完后确认用户
# id nagios
uid=988(nagios) gid=983(nagios) groups=983(nagios)
# id apache
uid=48(apache) gid=48(apache) groups=48(apache),983(nagios)
主配置文件路径:
/etc/nagios/nagios.cfg
子配置文件路径:
# ls /etc/nagios/objects/
commands.cfg localhost.cfg switch.cfg timeperiods.cfg
contacts.cfg printer.cfg templates.cfg windows.cfg
plugins(监控命令)路径,目录下有很多check开头的命令
# ls /usr/lib64/nagios/plugins/
3,设置http访问nagios的验证用户和密码
# htpasswd /etc/nagios/passwd nagiosadmin
# nagios -v /etc/nagios/nagios.cfg ----检查配置文件正确性
# systemctl restart httpd
# systemctl restart nagios
# systemctl status httpd
# systemctl status nagios
# systemctl enable httpd
# systemctl enable nagios
使用fire访问:
访问路径http://IP/nagios
-------------------------------------------------------------------------------------------------------------------------------------
现在查看web界面,默认只监控了localhost,并监控了其8个服务
一些小操作:
1,如果http服务为黄色,是警告,则需要把网站家目录里加一个主页进去(家目录为空,他就会警告)。
但需要等它下一次check才会OK。如果要手动check,可以点http,再右边点Re-schedule the next check of this service去强制check,就OK了
2,默认http和ssh是关闭通知的,是因为在localhost.cfg里这两个服务有一句 notifications_enabled 0。
也可以手动打开,点进去,再右边点enabled notifications for this service.
3,关闭ssh服务,刷新web界面,还是没有critical.
点击ssh,可以看到下一次计划的check时间。如果不等的话,在右边点Re-schedule the next check of this service强制check,再刷新就critical
-------------------------------------------------------------------------------------------------------------------------------------
关于nagios配置文件之间的联系讲解示例
# vim /etc/nagios/nagios.cfg
cfg_file=/etc/nagios/objects/localhost.cfg
# vim /etc/nagios/objects/localhost.cfg
define host{
use linux-server ----模版
host_name localhost ----主机名
alias localhost ----主机别名
address 127.0.0.1 ----被监控机器的IP
}
define hostgroup{
hostgroup_name linux-servers
alias Linux Servers
members localhost ----linux Servers组现在只有localhost这一个成员
}
--下面是8个默认定义的服务,我以监控磁盘利用率的这一段为例
define service{
use local-service ----模版,在templates.cfg 里定义的
host_name localhost ----主机名,调用的是同配置文件里define host里定义的host_name
service_description Root Partition ----描述,会在web界面显示的一个标题
check_command check_local_disk!20%!10%!/ ----检测利用率的命令,free空间小于20%就报警,小于10就critcal警告
}
# vim /etc/nagios/objects/templates.cfg
define host{
name linux-server
use generic-host ----linux主机模版也使用了一个叫generic-host的模版,也在templates.cfg里
check_period 24x7 ----在timeperiods.cfg 里定义的时间段
check_interval 5
retry_interval 1
max_check_attempts 10
check_command check-host-alive ----在commands.cfg 里定义的命令
notification_period workhours -----通知时间在timeperiods.cfg里定义的
notification_interval 120 ----通知间隔
notification_options d,u,r ----通知选项
contact_groups admins ----通知组,在contacts.cfg 里定义
register 0 -----不注册,表示这只是一个模版,被调用,不会被nagios进程认为就是一台主机
}
# vim /etc/nagios/objects/commands.cfg
define command{
command_name check-host-alive
command_line $USER1$/check_ping -H $HOSTADDRESS$ -w 3000.0,80% -c 5000.0,100% -p 5
}
--可用的命令都在libexec下,用--help去查
# /usr/lib64/nagios/plugins/check_ping --help
-----------------------------------------------------------------------------------------------------------------------------------------------
问题:
如何监控本地的/boot分区 使用80%警告,使用90% critical
define service{
use local-service
host_name localhost
service_description Boot Partition
check_command check_local_disk!20%!10%!/boot
}
问题:
如何监控本机zombie进程 5个警告 10个 critical
define service{
use local-service
host_name localhost
service_description Zombie Total Processes
check_command check_local_procs!5!10!Z
}
例:如何增加监控本机的ftp服务
思路步骤:
1,看/usr/lib64/nagios/plugins/下是否有检测ftp的命令,如果没有,自己开发
2,在localhost.cfg里定义这个服务
3,在command.cfg里定义命令
# vim /etc/nagios/objects/localhost.cfg --加上下面一段
define service{
use local-service
host_name localhost
service_description FTP
check_command check_ftp!1!3
}
# vim /etc/nagios/objects/commands.cfg ----下面一段默认就有,不需要加,直接改一下
define command{
command_name check_ftp
command_line $USER1$/check_ftp -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$
}
# /etc/init.d/nagios restart
练习:
1,如果本机ftp服务为监听2121端口,应该如何监控
# vim /etc/vsftpd/vsftpd.conf
listen_port=2121 --加上这一句
# /etc/init.d/vsftpd restart
# netstat -ntlup |grep ftp
# vim /etc/nagios/objects/localhost.cfg
----加下面一段
define service{
use local-service
host_name localhost
service_description FTP
check_command check_ftp_2121!1!3!2121
--命令我这里是没有的,在command.cfg里默认有一个check_ftp,没有
--check_ftp_2121这个,所以要手动去加;!为参数分隔符,1是第一个参数,3是第二个参数,2121是第三个参数;它们对应于我下面定义的-w -c -p
}
# vim /etc/nagios/objects/commands.cfg
define command{
command_name check_ftp_2121
command_line $USER1$/check_ftp -H $HOSTADDRESS$ -w $ARG1$ -c $ARG2$ -p $ARG3$
}
--直接使用监控命令去手工check一下,OK的
# /usr/lib64/nagios/plugins/check_ftp -w 1 -c 3 -p 2121
FTP OK - 0.004 second response time on port 2121 [220-#############################
220-#]|time=0.00389s;1.000000;3.000000;0.000000;10.000000
# systemctl restart nagios
如果本机http服务监听端口为8000,应该如何监控
# vim /etc/nagios/objects/localhost.cfg
define service{
use local-service
host_name localhost
service_description HTTP
check_command check_http_port!8000
# vim /etc/nagios/objects/commands.cfg
define command{
command_name check_http_port
command_line $USER1$/check_http -I $HOSTADDRESS$ -p $ARG1$
}
2,监控本机的mysql
# vim /etc/nagios/objects/localhost.cfg
define service{
use local-service
host_name localhost
service_description MYSQL
check_command check_mysql!root!123
}
# vim /etc/nagios/objects/commands.cfg
define command{
command_name check_mysql
command_line $USER1$/check_mysql -H $HOSTADDRESS$ -u $ARG1$ -p $ARG2$ ----第一个参数对应上面的root,第二个对应密码123
}
--手动check一下mysql,OK
# /usr/lib64/nagios/plugins/check_mysql -u root -p123
Uptime: 189 Threads: 1 Questions: 5 Slow queries: 0 Opens: 12 Flush tables: 1 Open tables: 6 Queries per second avg: 0.026
# systemctl restart nagios
=======================================================================
nagios server ----》 nagios client
172.16.2.10 172.16.2.11
我们把监控的服务分为公共和私有
公共:如ssh,http,ftp,mysql等。监控本地或远程的公共服务,都可以直接配置
私有:如load,users,disk usage等。监控本地私有服务直接配置就好,监控远程私有服务,需要服务和被监控端安装nrpe
例:监控远程服务器的普通服务(公共服务)。如ssh,http,ftp,mysql等
如:我的被监控端IP为172.16.2.11
1.在nagios服务器的主配置文件里加上172.16.2.11的主机配置文件
# vim /etc/nagios/nagios.cfg
cfg_file=/etc/nagios/objects/172.16.2.11.cfg
2,创建这个172.16.2.11.cfg
# vim /etc/nagios/objects/172.16.2.11.cfg
define host{
use linux-server
host_name 172.16.2.11 ----主机名,最好/etc/hosts里对应好IP,我这里没有做,就直接写IP
alias 172.16.2.11 ----显示到web上的名字
address 172.16.2.11 ----实际被监控主机IP
}
define hostgroup{
hostgroup_name remote linux-servers ----这里我定义了一个新组,不能和localhost.cfg里的组同名,会冲突
alias remote Linux Servers
members 172.16.2.11
}
----下面是公共服务,这里我只写了四个,你可以自行增加
define service{
use local-service
host_name 172.16.2.11
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service
host_name 172.16.2.11
service_description SSH
check_command check_ssh
}
define service{
use local-service
host_name 172.16.2.11
service_description HTTP
check_command check_http
}
define service{
use local-service
host_name 172.16.2.11
service_description FTP
check_command check_ftp!1!3
}
# nagios -v /etc/nagios/nagios.cfg
# systemctl restart nagios
------------------------------------------------------------------------------------------------------------------------------------
例:监控远程的私有服务
172.16.2.10 172.16.2.11
nagios监控端 被监控linux
check_disk
check_nrpe --------- check_nrpe check_swap
SSL传输 check_load等
第一大步:nagios监控端上的操作
1,确认有如下的命令,如果没有,则yum install nagios-plugins-nrpe
/usr/lib64/nagios/plugins/check_nrpe
2,增加check_nrpe命令到commands.conf文件里
# vim /etc/nagios/objects/commands.cfg
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
--c参数后接command, 也就说check_nrpe可以调用别的check命令
3,在nagios服务器上对172.16.2.11的配置文件增加远程私有服务
# vim /etc/nagios/objects/172.16.2.11.cfg
define service{
use local-service
host_name 172.16.2.11
service_description Current Users
check_command check_nrpe!check_remote_users
}
--check_remote_users就是check_nrpe的C参数要调用的命令,此命令在nagios服务器上的commands.cfg里是不存在,它会在后面的步骤中加到被监控端
# systemctl restart nagios
4,用下面的命令做测试,但现在是会报对方5666端口拒绝(因为被监控端还没有安装配置)
# /usr/lib64/nagios/plugins/check_nrpe -H 172.16.2.11 -c check_remote_users
connect to address 172.16.2.11 port 5666: Connection refused
connect to host 172.16.2.11 port 5666: Connection refused
第二大步:nagios被监控端上的操作
1,安装nrpe和其它监控命令包
# yum install nrpe nagios-plugins*
2,修改nrpe主配置文件
# vim /etc/nagios/nrpe.cfg
allowed_hosts=172.16.2.10
command[check_remote_users]=/usr/lib64/nagios/plugins/check_users -w 5 -c 10
3,启动服务,并检查5666端口是否开启
# systemctl restart nrpe
# lsof -i:5666
第三大步:回到nagios服务器端测试
再次使用下面的命令,就可以监控到远程的实际登录用户数了
# /usr/lib64/nagios/plugins/check_nrpe -H 172.16.2.11 -c check_remote_users
USERS WARNING - 9 users currently logged in |users=9;5;10;0
最后,清firefox缓存,在firefox查看远程监控也正确了
----------------------------------------------------------------------------------------------------------------------------------
邮件报警验证:
1,确认你至少有一个service为crital状态
2,yum install postfix
# systemctl status postfix
# systemctl status postfix
# systemctl status postfix
3,保证nagios服务器能上公网,还有确认有mail命令了
4,# vim /etc/nagios/objects/contacts.cfg
email litengllll@126.com --改成你的一个公网测试邮箱
5,/etc/init.d/nagios restart
如果你想做成免费手机短信通知,可以使用类似139邮箱这种(有邮件到达就短信通知的功能)的邮箱
现在有智能手机就方便多了,直接报警邮件发给外部一个邮箱,然后在你的手机上下载对应邮箱的app软件就ok了
现在nagios官方直接都有手机客户端管理软件
======================================================================================
nagios图表
nagiosgraph-1.4.4.tar.gz
软件包路径:
笔记目录/program/nagios_soft/nagiosgraph-1.4.4.tar.gz
# tar xf nagiosgraph-1.4.4.tar.gz -C /usr/src
# cd /usr/src/nagiosgraph-1.4.4
# ./install.pl --check-prereq
checking required PERL modules
Carp...1.26
CGI...3.63 --如果fail,则yum install perl-CGI
Data::Dumper...2.145
File::Basename...2.84
File::Find...1.20
MIME::Base64...3.13
POSIX...1.30
RRDs...1.4008 --如果fail,则yum install rrdtool-perl rrdtool
Time::HiRes...1.9725
checking optional PERL modules
GD...2.49 --如果fail,则yum install perl-GD
checking nagios installation
found nagios at /sbin/nagios
checking web server installation
found apache at /sbin/httpd
上面的检测全面ok后,则使用下面的命令安装,一直回车就可以了
1,
# ./install.pl --install
checking required PERL modules
Carp...1.11
CGI...3.51
Data::Dumper...2.124
File::Basename...2.77
File::Find...1.14
MIME::Base64...3.08
POSIX...1.17
RRDs...1.4008
Time::HiRes...1.9721
checking optional PERL modules
GD...2.53
checking nagios installation
found nagios at /usr/local/nagios/bin/nagios
checking web server installation
found apache at /usr/sbin/httpd
Destination directory (prefix)? [/usr/local/nagiosgraph]
Location of configuration files (etc-dir)? [/usr/local/nagiosgraph/etc]
Location of executables? [/usr/local/nagiosgraph/bin]
Location of CGI scripts? [/usr/local/nagiosgraph/cgi]
Location of documentation (doc-dir)? [/usr/local/nagiosgraph/doc]
Location of examples? [/usr/local/nagiosgraph/examples]
Location of CSS and JavaScript files? [/usr/local/nagiosgraph/share]
Location of utilities? [/usr/local/nagiosgraph/util]
Location of state files (var-dir)? [/usr/local/nagiosgraph/var]
Location of RRD files? [/usr/local/nagiosgraph/var/rrd]
Location of log files (log-dir)? [/usr/local/nagiosgraph/var]
Path of log file? [/usr/local/nagiosgraph/var/nagiosgraph.log]
Path of CGI log file? [/usr/local/nagiosgraph/var/nagiosgraph-cgi.log]
URL of CGI scripts? [/nagiosgraph/cgi-bin]
URL of CSS file? [/nagiosgraph/nagiosgraph.css]
URL of JavaScript file? [/nagiosgraph/nagiosgraph.js]
Path of Nagios performance data file? [/tmp/perfdata.log]
URL of Nagios CGI scripts? [/nagios/cgi-bin]
username or userid of Nagios user? [nagios]
username or userid of web server user? [apache]
Modify the Nagios configuration? [n]
Modify the Apache configuration? [n]
configuration:
ng_layout standalone
ng_prefix /usr/local/nagiosgraph
ng_etc_dir /usr/local/nagiosgraph/etc
ng_bin_dir /usr/local/nagiosgraph/bin
ng_cgi_dir /usr/local/nagiosgraph/cgi
ng_doc_dir /usr/local/nagiosgraph/doc
ng_examples_dir /usr/local/nagiosgraph/examples
ng_www_dir /usr/local/nagiosgraph/share
ng_util_dir /usr/local/nagiosgraph/util
ng_var_dir /usr/local/nagiosgraph/var
ng_rrd_dir /usr/local/nagiosgraph/var/rrd
ng_log_dir /usr/local/nagiosgraph/var
ng_log_file /usr/local/nagiosgraph/var/nagiosgraph.log
ng_cgilog_file /usr/local/nagiosgraph/var/nagiosgraph-cgi.log
ng_url /nagiosgraph
ng_cgi_url /nagiosgraph/cgi-bin
ng_css_url /nagiosgraph/nagiosgraph.css
ng_js_url /nagiosgraph/nagiosgraph.js
nagios_cgi_url /nagios/cgi-bin
nagios_perfdata_file /tmp/perfdata.log
nagios_user nagios
www_user apache
modify_nagios_config n
nagios_config_file
nagios_commands_file
modify_apache_config n
apache_config_dir
apache_config_file
Continue with this configuration? [y]
.............
2,vim /etc/nagios/nagios.cfg --最后加上下面一段
process_performance_data=1
service_perfdata_file=/tmp/perfdata.log
service_perfdata_file_template=$LASTSERVICECHECK$||$HOSTNAME$||$SERVICEDESC$||$SERVICEOUTPUT$||$SERVICEPERFDATA$
service_perfdata_file_mode=a
service_perfdata_file_processing_interval=30
service_perfdata_file_processing_command=process-service-perfdata-for-nagiosgraph
3,# vim /etc/nagios/objects/commands.cfg
define command {
command_name process-service-perfdata-for-nagiosgraph
command_line /usr/local/nagiosgraph/bin/insert.pl
}
4,
# vim /etc/httpd/conf/httpd.conf --在你的apache里include这个文件
Include /usr/local/nagiosgraph/etc/nagiosgraph-apache.conf
# systemctl restart httpd
# systemctl restart nagios