(lamp平台)
系统环境: rhel6 x86_64 iptables and selinux disabled
主机: 192.168.122.119 server19.example.com
192.168.122.7 server7.example.com (注:时间需同步)
相关网址: rpm.pbone.net http://www.cyberciti.biz/nixcraft/linux/docs/uniqlinuxfeatures/mrtg/ cacti.net nagios.org https://trac.assembla.com/npc
所需的包:cacti-0.8.8b.tar.gz cacti-spine-0.8.8b.tar.gz settings-v0.71-1.tgz monitor-v1.3-1.tgz thold-v0.4.9-3.tgz nagios-cn-3.2.3.tar.bz2 nagios-plugins-1.4.16.tar.gz gd-devel-2.0.35-10.el6.x86_64.rpm fetion linuxso_20101113.tar.gz nrpe-2.14.tar.gz ganglia-3.6.0.tar.gz ganglia-web-3.5.10.tar.gz libconfuse-2.6-3.el6.x86_64.rpm libconfuse-devel-2.6-3.el6.x86_64.rpm rrdtool-devel-1.3.8-6.el6.x86_64.rpm npc-2.0.4.tar.gz ndoutils-1.5.2.tar.gz
1.安装与配置cacti
[root@server19 kernel]# yum install httpd mysql-server php rrdtool -y
[root@server19 kernel]# yum install php-mysql php-xml mysql net-snmp net-snmp-utils.x86_64 -y
[root@server19 kernel]# rpm -ivh php-snmp-5.3.3-22.el6.x86_64.rpm
注:因为在红帽企业7中用mariadb替代了mysql,所以需要将mysql和mysql-server换成mariadb和mariadb-server.并且php-snmp的版本必须与企业7中php的版本一致,可以通过rpm -q php查看php的版本.
[root@server19 kernel]# vim /etc/php.ini
safe_mode = Off
date.timezone = Asia/Shanghai
file_uploads = On
[root@server19 kernel]# /etc/init.d/mysqld start
[root@server19 kernel]# mysql_secure_installation (给数据库设置密码)
[root@server19 kernel]# vim /etc/snmp/snmpd.conf
(注:以下配置可在http://www.cyberciti.biz/nixcraft/linux/docs/uniqlinuxfeatures/mrtg/上查看)
#com2sec notConfigUser default public
com2sec local localhost public
com2sec mynetwork 192.168.122.0/24 public
#group notConfigGroup v1 notConfigUser
#group notConfigGroup v2c notConfigUser
group MyRWGroup v1 local
group MyRWGroup v2c local
group MyRWGroup usm local
group MyROGroup v1 mynetwork
group MyROGroup v2c mynetwork
group MyROGroup usm mynetwork
view systemview included .1.3.6.1.2.1
view systemview included .1.3.6.1.2.1.25.1.1
view all included .1 80
#access notConfigGroup "" any noauth exact systemview none none
access MyROGroup "" any noauth exact all none none
access MyRWGroup "" any noauth exact all all none
syslocation rhel6.4 (edit /etc/snmp/snmpd.conf)
syscontact Root <root@localhost> (configure /etc/snmp/snmp.local.conf)
[root@server19 kernel]# /etc/init.d/snmpd start
[root@server19 kernel]# snmpwalk -v 1 -c public localhost IP-MIB::ipAdEntIfIndex (此命令可在http://www.cyberciti.biz/nixcraft/linux/docs/uniqlinuxfeatures/mrtg/上查看)
IP-MIB::ipAdEntIfIndex.127.0.0.1 = INTEGER: 1
IP-MIB::ipAdEntIfIndex.192.168.122.119 = INTEGER: 2
(出现类似上面的输出为正常)
[root@server19 kernel]# tar zxf cacti-0.8.8b.tar.gz -C /var/www/html/
[root@server19 kernel]# mysqladmin -uroot -pwestos create cacti
[root@server19 kernel]# cd /var/www/html/
[root@server19 html]# ln -s cacti-0.8.8b/ cacti
[root@server19 html]# cd cacti
[root@server19 cacti]# mysql -uroot -pwestos cacti < cacti.sql
[root@server19 cacti]# mysql -uroot -pwestos
mysql> grant all on cacti.* to cacti@localhost identified by 'cacti';
mysql> flush privileges;
mysql> quit
[root@server19 cacti]# cd /var/www/html/cacti/include/
[root@server19 include]# vim config.php
$database_type = "mysql";
$database_default = "cacti";
$database_hostname = "localhost";
$database_username = "cacti";
$database_password = "cacti";
$database_port = "3306";
$database_ssl = false;
$url_path = "/cacti/";
$cacti_session_name = "Cacti";
[root@server19 include]# useradd cacti
[root@server19 include]# chown cacti /var/www/html/cacti-0.8.8b/ -R
[root@server19 include]# su - cacti
[cacti@server19 ~]$ crontab -e
*/5 * * * * php /var/www/html/cacti/poller.php > /dev/null 2>&1
[root@server19 include]# /etc/init.d/httpd start
访问server19.example.com/cacti出现如下页面:
注:net-snmp和rrdtool的版本设置需要与本机的版本匹配,可以通过rpm -q net-snmp和rpm -q rrdtool查看版本.
注:第一次登录会强制要求更改密码(初始用户名:admin,密码:admin)
出现类似上面图形说明可以正常采集到数据.
#安装并配置spine
[root@server19 kernel]# yum install net-snmp-devel mysql-devel openssl-devel dos2unix autoconf automake binutils libtool gcc cpp glibc-headers kernel-headers glibc-devel -y
注:因为在红帽企业7中用mariadb替代了mysql,所以需要将mysql-devel换成mariadb-devel.
[root@server19 kernel]# tar zxf cacti-spine-0.8.8b.tar.gz
[root@server19 kernel]# cd cacti-spine-0.8.8b
[root@server19 cacti-spine-0.8.8b]# ./bootstrap
[root@server19 cacti-spine-0.8.8b]# ./configure
[root@server19 cacti-spine-0.8.8b]# make && make install
[root@server19 cacti-spine-0.8.8b]# cd /usr/local/spine/etc/
[root@server19 etc]# cp spine.conf.dist spine.conf
[root@server19 etc]# vim spine.conf
DB_Host localhost
DB_Database cacti
DB_User cacti
DB_Pass cacti
DB_Port 3306
DB_PreG 1
(注:如果你用的是cacti087g,将DB_PreG配置为0,否则将 DB_PreG配置为1.)
[root@server19 etc]# cp spine.conf /etc/
[root@server19 etc]# /usr/local/spine/bin/spine
PINE: Using spine config file [spine.conf]
INE: Version 0.8.8b starting
INE: Time: 0.1288 s, Threads: 5, Hosts: 2
出现类似以上输出为正确.
选择Settings选项.
选择Paths选项.
如图填写并保存,然后选择Poller选项.
如图选择并保存.
注:观察图像,如果没有出现中断说明spine配置成功.
#安装插件
[root@server19 kernel]# tar zxf settings-v0.71-1.tgz -C /var/www/html/cacti/plugins
[root@server19 kernel]# tar zxf monitor-v1.3-1.tgz -C /var/www/html/cacti/plugins
[root@server19 kernel]# tar zxf thold-v0.4.9-3.tgz -C /var/www/html/cacti/plugins
在Configuration中就会出现Plugin Management这个选项,启动插件.
注:在新版的cacti中已经集成了Plugin Management这个功能,只需将要安装的插件包加压到cacti目录下的plugins目录中,然后在Plugin Management中安装激活即可使用
#配置thold插件进行报警并发送邮件到QQ邮箱
(注:QQ邮箱必须打开POP3/SMTP服务)
选择Setting
选择Mail|DNS
仿照上图进行配置并保存
选择Thresholds
仿照上图进行配置并保存
选择Threshold Templates
仿照上图配置并保存
选择Graph Managerment
按照上图配置
注:配置完毕后在thold选项中可以查看到类似如下内容:
当登录用户数超过上限或者低于下限时将发送警报到QQ邮箱.
#监控远程主机(server7.example.com)
以下步骤在server7上实施:
[root@server7 ~]# yum install net-snmp net-snmp-utils -y
[root@server7 ~]# vim /etc/snmp/snmpd.conf
(注:以下配置可在http://www.cyberciti.biz/nixcraft/linux/docs/uniqlinuxfeatures/mrtg/上查看)
#com2sec notConfigUser default public
com2sec local localhost public
com2sec mynetwork 192.168.122.0/24 public
#group notConfigGroup v1 notConfigUser
#group notConfigGroup v2c notConfigUser
group MyRWGroup v1 local
group MyRWGroup v2c local
group MyRWGroup usm local
group MyROGroup v1 mynetwork
group MyROGroup v2c mynetwork
group MyROGroup usm mynetwork
view systemview included .1.3.6.1.2.1
view systemview included .1.3.6.1.2.1.25.1.1
view all included .1 80
#access notConfigGroup "" any noauth exact systemview none none
access MyROGroup "" any noauth exact all none none
access MyRWGroup "" any noauth exact all all none
syslocation rhel6.4 (edit /etc/snmp/snmpd.conf)
syscontact Root <root@localhost> (configure /etc/snmp/snmp.local.conf)
[root@server7 ~]# /etc/init.d/snmpd start
[root@server7 ~]# snmpwalk -v 1 -c public localhost IP-MIB::ipAdEntIfIndex(此命令可在http://www.cyberciti.biz/nixcraft/linux/docs/uniqlinuxfeatures/mrtg/上查看)
IP-MIB::ipAdEntIfIndex.127.0.0.1 = INTEGER: 1
IP-MIB::ipAdEntIfIndex.192.168.122.7 = INTEGER: 2
注:出现类似上面输出,说明配置完成.
以下步骤在server19上实施:
[root@server19 kernel]# yum install net-snmp net-snmp-utils -y
[root@server19 kernel]# vim /etc/snmp/snmpd.conf
(注:以下配置可在http://www.cyberciti.biz/nixcraft/linux/docs/uniqlinuxfeatures/mrtg/上查看)
#com2sec notConfigUser default public
com2sec local localhost public
com2sec mynetwork 192.168.122.0/24 public
#group notConfigGroup v1 notConfigUser
#group notConfigGroup v2c notConfigUser
group MyRWGroup v1 local
group MyRWGroup v2c local
group MyRWGroup usm local
group MyROGroup v1 mynetwork
group MyROGroup v2c mynetwork
group MyROGroup usm mynetwork
view systemview included .1.3.6.1.2.1
view systemview included .1.3.6.1.2.1.25.1.1
view all included .1 80
#access notConfigGroup "" any noauth exact systemview none none
access MyROGroup "" any noauth exact all none none
access MyRWGroup "" any noauth exact all all none
syslocation rhel6.4 (edit /etc/snmp/snmpd.conf)
syscontact Root <root@localhost> (configure /etc/snmp/snmp.local.conf)
[root@server19 kernel]# /etc/init.d/snmpd start
[root@server19 kernel]# snmpwalk -v 1 -c public 192.168.122.7 IP-MIB::ipAdEntIfIndex(此命令可在http://www.cyberciti.biz/nixcraft/linux/docs/uniqlinuxfeatures/mrtg/上查看)
IP-MIB::ipAdEntIfIndex.127.0.0.1 = INTEGER: 1
IP-MIB::ipAdEntIfIndex.192.168.122.7 = INTEGER: 2
选择Devices选项,并选择add设备.
如上图填写并保存.
注:如果需要监控root分区的剩余空间,需要将/etc/snmp/snmpd.conf中的disk / 1000这一行的注释去掉,这样在创建图时才有相应的选项,否则无法监控。
选择所要创建的图.
将新添加的设备加入到树中.
至此cacti的安装及配置完毕!!!
2.安装与配置nagios
[root@server19 kernel]# useradd nagios
[root@server19 kernel]# usermod -G nagios apache(注:通过web更改nagios设置时,需要考虑此权限问题,例如:启动服务通知时)
[root@server19 kernel]# yum localinstall gd-devel-2.0.35-11.el6.x86_64.rpm -y
[root@server19 kernel]# tar jxf nagios-cn-3.2.3.tar.bz2
[root@server19 kernel]# cd nagios-cn-3.2.3
[root@server19 nagios-cn-3.2.3]# ./configure –enable-embedded-perl
此时会出现如下错误:
Can't locate ExtUtils/Embed.pm in @INC (@INC contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .).
BEGIN failed--compilation aborted.
Can't locate ExtUtils/Embed.pm in @INC (@INC contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .).
BEGIN failed--compilation aborted.
creating base/perlxsi.c
Can't locate ExtUtils/Embed.pm in @INC (@INC contains: /usr/local/lib64/perl5 /usr/local/share/perl5 /usr/lib64/perl5/vendor_perl /usr/share/perl5/vendor_perl /usr/lib64/perl5 /usr/share/perl5 .).
解决方法如下:
[root@server19 nagios-cn-3.2.3]# yum install perl-ExtUtils-Embed -y
[root@server19 nagios-cn-3.2.3]# make all
[root@server19 nagios-cn-3.2.3]# make install
[root@server19 nagios-cn-3.2.3]# make install-init
[root@server19 nagios-cn-3.2.3]# make install-commandmode
[root@server19 nagios-cn-3.2.3]# make install-config
[root@server19 nagios-cn-3.2.3]# make install-webconf
[root@server19 kernel]# tar zxf nagios-plugins-1.4.16.tar.gz
[root@server19 kernel]# cd nagios-plugins-1.4.16
[root@server19 nagios-plugins-1.4.16]# yum install openssl-devel -y
[root@server19 nagios-plugins-1.4.16]# ./configure --enable-extra-opts --enable-perl-modules --enable-libtap --with-nagios-user=nagios --with-nagios-group=nagios
[root@server19 nagios-plugins-1.4.16]# make && make install
[root@server19 nagios-plugins-1.4.16]# htpasswd -m /usr/local/nagios/etc/htpasswd.users nagiosadmin
[root@server19 nagios-plugins-1.4.16]# chown nagios.nagios /usr/local/nagios/ -R
[root@server19 nagios-plugins-1.4.16]# /etc/init.d/nagios start
[root@server19 nagios-plugins-1.4.16]# /etc/init.d/httpd restart
访问server19.example.com/nagios并登录(用户名为nagiosadmin),出现如下页面为成功.(注:记得IP和域名的解析)
#配置nagios监控本机
[root@server19 ~]# cd /usr/local/nagios/etc/
[root@server19 etc]# vim nagios.cfg
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
cfg_file=/usr/local/nagios/etc/objects/services.cfg
[root@server19 etc]# cd objects/
[root@server19 objects]# cp -p localhost.cfg hosts.cfg
[root@server19 objects]# vim hosts.cfg
define host{
use linux-server
host_name server19.example.com
alias HomeSwitch
address 192.168.122.119
icon_image switch.gif
statusmap_image switch.gd2
2d_coords 100,200
3d_coords 100,200,100
}
define hostgroup{
hostgroup_name linux-servers ;
alias Linux Servers ;
members * ;
}
[root@server19 objects]# cp -p localhost.cfg services.cfg
[root@server19 objects]# vim services.cfg
define servicegroup{
servicegroup_name 系统负荷检查
alias 负荷检查
members server19.example.com,进程总数,server19.example.com,登录用户数,server19.example.com,根分区,server19.example.com,交换空间利用率
}
define service{
use local-service ;
host_name *
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
define service{
use local-service ;
host_name server19.example.com
service_description 根分区
check_command check_local_disk!20%!10%!/
}
define service{
use local-service ;
host_name server19.example.com
service_description 登录用户数
check_command check_local_users!20!50
}
define service{
use local-service ;
host_name server19.example.com
service_description 进程总数
check_command check_local_procs!250!400!RSZDT
}
define service{
use local-service ;
host_name server19.example.com
service_description 系统负荷
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
define service{
use local-service ;
host_name server19.example.com
service_description 交换空间利用率
check_command check_local_swap!20!10
}
define service{
use local-service ;
host_name server19.example.com
service_description SSH
check_command check_tcp!22!1.0!10.0
notifications_enabled 0
}
define service{
use local-service ;
host_name server19.example.com
service_description http
check_command check_http
notifications_enabled 0
}
[root@server19 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg(检测配置是否有错误)
[root@server19 objects]# /etc/init.d/nagios reload
访问server19.example.com/nagios
#配置nagios监控远程主机上的服务(以mysql为例)
以下步骤在server7上实施:
[root@server7 kernel]# mysql
mysql> create database nagios;
mysql> grant select on nagios.* to nagios@'192.168.122.119';
mysql> quit
测试:在server19上执行/usr/local/nagios/libexec/check_mysql -H 192.168.122.7 -u nagios ,若出现类似如下输出说明正常:
Uptime: 9440 Threads: 1 Questions: 42 Slow queries: 0 Opens: 15 Flush tables: 1 Open tables: 8 Queries per second avg: 0.4
以下步骤在server19上实施:
先将 nagios-cn-3.2.3.tar.bz2 nagios-plugins-1.4.16.tar.gz装上
[root@server19 ~]# cd /usr/local/nagios/etc/objects/
[root@server19 objects]# vim commands.cfg
# 'check_mysql' command definition
define command{
command_name check_mysql
ommand_line $USER1$/check_mysql -H $HOSTADDRESS$ -u $ARG1$ -d $ARG2$
}
[root@server19 objects]# vim hosts.cfg
define host{
use linux-server
host_name server7.example.com
alias Mysql server
address 192.168.122.7
}
[root@server19 objects]# vim services.cfg
define service{
use generic-service
host_name server7.example.com
service_description MYSQL
check_command check_mysql!nagios!nagios
}
[root@server19 objects]# /etc/init.d/nagios reload
访问server19.example.com/nagios查看如下图说明配置成功
#配置nagios监控远程主机
以下步骤在server7上实施:
先将nagios-plugins-1.4.16.tar.gz装上
[root@server7 kernel]# yum install xinetd -y
[root@server7 kernel]# tar zxf nrpe-2.14.tar.gz
[root@server7 kernel]# cd nrpe-2.14
[root@server7 nrpe-2.14]# ./configure
[root@server7 nrpe-2.14]# useradd nagios
[root@server7 nrpe-2.14]# chown nagios.nagios /usr/local/nagios/ -R
[root@server7 nrpe-2.14]# make all
[root@server7 nrpe-2.14]# make install-plugin
[root@server7 nrpe-2.14]# make install-daemon
[root@server7 nrpe-2.14]# make install-daemon-config
[root@server7 nrpe-2.14]# make install-xinetd
[root@server7 nrpe-2.14]# vim /etc/xinetd.d/nrpe
# default: on
# description: NRPE (Nagios Remote Plugin Executor)
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 192.168.122.119
}
[root@server7 nrpe-2.14]# vim /etc/services
nrpe 5666/tcp #NRPE
[root@server7 nrpe-2.14]# vim /usr/local/nagios/etc/nrpe.cfg
command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p /
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 150 -c 200
[root@server7 nrpe-2.14]# /etc/init.d/xinetd restart
[root@server7 nrpe-2.14]# netstat -antple
tcp 0 0 :::5666 :::* LISTEN 0 40011 1407/xinetd
要能查看到5666端口
以下步骤在server19上实施:
[root@server19 kernel]# tar zxf nrpe-2.14.tar.gz
[root@server19 kernel]# cd nrpe-2.14
[root@server19 nrpe-2.14]# ./configure
[root@server19 nrpe-2.14]# make all
[root@server19 nrpe-2.14]# make install-plugin
测试:执行/usr/local/nagios/libexec/check_nrpe -H 192.168.122.7 -c check_disk 若出现如下输出为正常:
DISK OK - free space: / 3367 MB (77% inode=90%);| /=975MB;3659;4116;0;4574
[root@server19 nrpe-2.14]# cd /usr/local/nagios/etc/objects/
[root@server19 objects]# vim commands.cfg
# 'check_nrpe' command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
[root@server19 objects]# vim services.cfg
define service{
use local-service
host_name server7.example.com
service_description 根分区
check_command check_nrpe!check_disk
}
define service{
use local-service
host_name server7.example.com
service_description 登录用户数
check_command check_nrpe!check_users
}
[root@server19 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg (检测配置是否有错误)
[root@server19 objects]# /etc/init.d/nagios reload
访问server19.example.com/nagios查看如下图说明配置成功
#配置飞信并与nagios整合
[root@server19 kernel]# mv fetion /usr/local/nagios/libexec/
[root@server19 kernel]# chmod +x /usr/local/nagios/libexec/fetion
[root@server19 kernel]# chown nagios.nagios /usr/local/nagios/libexec/fetion
[root@server19 kernel]# tar zxf linuxso_20101113.tar.gz -C /lib
[root@server19 kernel]# su - nagios
[nagios@server19 ~]$ /usr/local/nagios/libexec/fetion
此时会出现如下错误:
(1)/usr/local/nagios/libexec/fetion: error while loading shared libraries: libstdc++.so.6: cannot open shared object file: No such file or directory
(2)/usr/local/nagios/libexec/fetion: error while loading shared libraries: libgssapi_krb5.so.2: cannot open shared object file: No such file or directory
(3)/usr/local/nagios/libexec/fetion: error while loading shared libraries: libz.so.1: cannot open shared object file: No such file or directory
解决方法如下:
(1)[root@server19 kernel]# yum install libstdc++.so.6 -y
(2)[root@server19 kernel]# yum install libgssapi_krb5.so.2 -y
(3)[root@server19 kernel]# yum install libz.so.1 -y
注:出现以上输出为正常
测试:[root@server19 ~]# /usr/local/nagios/libexec/fetion --mobile=13484476621 --pwd=westos --to=13484476621 --msg-utf8="hello"
图形验证码跟你的fetion脚本在同一个目录下
[root@server19 ~]# su - nagios
[nagios@server19 ~]$ cd /usr/local/nagios/libexec/
[nagios@server19 libexec]$ vim fetion.sh
/usr/local/nagios/libexec/fetion --mobile=13484476621 --pwd=westos --to="$1" --msg-utf8="$2"
[nagios@server19 libexec]$ chmod +x fetion.sh
测试: [root@server19 ~]# /usr/local/nagios/libexec/fetion.sh 13484476621 "hello world"
[nagios@server19 libexec]$ cd /usr/local/nagios/etc/objects/
[nagios@server19 objects]$ vim commands.cfg
# 'notify-host-by-fetion' command definition
define command{
command_name notify-host-by-fetion
command_line $USER1$/fetion.sh $CONTACTPAGER$ "$NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$"
}
# 'notify-service-by-fetion' command definition
define command{
command_name notify-service-by-fetion
command_line $USER1$/fetion.sh $CONTACTPAGER$ "$NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$"
}
[nagios@server19 objects]$ vim templates.cfg
define contact{
name generic-contact ;
service_notification_period 24x7 ;
host_notification_period 24x7 ;
service_notification_options w,u,c,r,f,s ;
host_notification_options d,u,r,f,s ;
service_notification_commands notify-service-by-email,notify-service-by-fetion ;
host_notification_commands notify-host-by-email,notify-host-by-fetion ;
register 0 ;
}
[nagios@server19 objects]$ vim contacts.cfg
define contact{
contact_name nagiosadmin ;
use generic-contact ;
alias Nagios Admin ;
email nagios@localhost ;
pager 13484476621 ;
}
[root@server19 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg (检测配置是否有错误)
[nagios@server19 objects]$ /etc/init.d/nagios reload
至此nagios的安装及配置完毕!!!
3.安装与配置ganglia
[root@server19 kernel]# yum install rpm-build -y
[root@server19 kernel]# rpmbuild -tb ganglia-3.6.0.tar.gz
此时会出现如下错误:
error: Failed build dependencies:
libart_lgpl-devel is needed by ganglia-3.6.0-1.x86_64
gcc-c++ is needed by ganglia-3.6.0-1.x86_64
python-devel is needed by ganglia-3.6.0-1.x86_64
libconfuse-devel is needed by ganglia-3.6.0-1.x86_64
pcre-devel is needed by ganglia-3.6.0-1.x86_64
expat-devel is needed by ganglia-3.6.0-1.x86_64
rrdtool-devel is needed by ganglia-3.6.0-1.x86_64
apr-devel > 1 is needed by ganglia-3.6.0-1.x86_64
解决方法如下:
[root@server19 kernel]# yum install libart_lgpl-devel python-devel expat-devel apr-devel pcre-devel gcc-c++ -y
[root@server19 kernel]# rpm -ivh rrdtool-devel-1.3.8-6.el6.x86_64.rpm
[root@server19 kernel]# rpm -ivh libconfuse-*
注:在红帽企业7中由于对服务的管理机制不一样(由systemv到systemd),所以在直接进行编译(rpmbuild -tb ganglia-3.6.0.tar.gz)时会出现如下错误:
make[2]: *** No rule to make target `gmetad.service.in', needed by `gmetad.service'. Stop.
make[2]: Leaving directory `/root/rpmbuild/BUILD/ganglia-3.6.0/gmetad'
make[1]: *** [all-recursive] Error 1
make[1]: Leaving directory `/root/rpmbuild/BUILD/ganglia-3.6.0'
make: *** [all] Error 2
error: Bad exit status from /var/tmp/rpm-tmp.J7WIYv (%build)
RPM build errors:
bogus date in %changelog: Thu Mar 28 2008 Brad Nicholes <bnicholes@novell.com>
bogus date in %changelog: Wed Jul 10 2007 Bernard Li <bernard@vanhpc.org>
bogus date in %changelog: Wed Jul 3 2007 Brad Nicholes <bnicholes@novell.com>
bogus date in %changelog: Wed Jun 14 2007 Brad Nicholes <bnicholes@novell.com>
bogus date in %changelog: Fri Feb 25 2006 Bernard Li <bli@bcgsc.ca>
Bad exit status from /var/tmp/rpm-tmp.J7WIYv (%build)
解决方法如下:
[root@server19 kernel]# tar zxf ganglia-3.6.0.tar.gz
[root@server19 kernel]# cd ganglia-3.6.0
[root@server19 ganglia-3.6.0]# vim ganglia.spec (/files)
%files gmetad
%defattr(-,root,root)
%attr(0755,nobody,nobody)/var/lib/ganglia/
%{_sbindir}/gmetad
/etc/init.d/gmetad
/usr/lib/systemd/system/gmetad.service
%config(noreplace) /etc/sysconfig/gmetad
%{_mandir}/man1/gmetad*1*
%config(noreplace) %{conf_dir}/gmetad.conf
…
%files gmond
%defattr(-,root,root)
%{_bindir}/gmetric
%{_bindir}/gstat
%{_sbindir}/gmond
/etc/init.d/gmond
/usr/lib/systemd/system/gmond.service
%{_mandir}/man1/gmetric.1*
%{_mandir}/man1/gmond.1*
%{_mandir}/man1/gstat.1*
%{_mandir}/man5/gmond.conf.5*
[root@server19 ganglia-3.6.0]# cd gmetad
[root@server19 gmetad]# vim gmetad.service.in
[Unit]
Description=Ganglia Meta Daemon
After=network.target
[Service]
Type=forking
PIDFile=/run/gmetad.pid
ExecStart=/usr/sbin/gmetad --pid-file=/run/gmetad.pid
#EnvironmentFile=-/etc/sysconfig/gmetad
[Install]
WantedBy=multi-user.target
[root@server19 gmetad]# cd ../gmond/
[root@server19 gmond]# vim gmond.service.in
[Unit]
Description=Ganglia Monitor Daemon
After=network.target
[Service]
Type=forking
PIDFile=/run/gmond.pid
ExecStart=/usr/sbin/gmond --pid-file=/run/gmond.pid
[Install]
WantedBy=multi-user.target
[root@server19 gmond]# cd /root/kernel
[root@server19 kernel]# rm -rf ganglia-3.6.0.tar.gz
[root@server19 kernel]# tar zcvf ganglia-3.6.0.tar.gz ganglia-3.6.0/ (重新打包)
[root@server19 kernel]# rpmbuild -tb ganglia-3.6.0.tar.gz
[root@server19 kernel]# cd ~/rpmbuild/RPMS/x86_64/
[root@server19 x86_64]# rpm -ivh *
[root@server19 x86_64]# vim /etc/ganglia/gmond.conf
cluster {
name = "my cluster"
owner = "unspecified"
latlong = "unspecified"
url = "unspecified"
}
[root@server19 x86_64]# /etc/init.d/gmetad start
[root@server19 x86_64]# /etc/init.d/gmond start
若配置成功在/var/lib/ganglia/rrds/会自动建立my cluster和__SummaryInfo__两个文件夹,采集到的信息存储在my cluster中.
#将ganglia以网页的形式发布
[root@server19 kernel]# rpmbuild -tb ganglia-web-3.5.10.tar.gz
注:在红帽企业7中编译较新版本的ganglia-web时,可能会出现以下错误:
+ cd ganglia-web-3.6.2
+ /usr/bin/chmod -Rf a+rX,u+w,g-w,o-w .
+ /usr/bin/cp /root/apache.conf .
/usr/bin/cp: cannot stat '/root/apache.conf': No such file or directory
error: Bad exit status from /var/tmp/rpm-tmp.zQclcS (%prep)
解决方法如下:
[root@server19 kernel]# tar zxf ganglia-web-3.6.2.tar.gz
[root@server19 kernel]# cd ganglia-web-3.6.2
[root@server19 ganglia-web-3.6.2]# cp apache.conf /root/rpmbuild/SOURCES/
[root@server19 ganglia-web-3.6.2]# cp /root/kernel/ganglia-web-3.6.2.tar.gz /root/rpmbuild/SOURCES/
[root@server19 ganglia-web-3.6.2]# rpmbuild -bb ganglia-web.spec
[root@server19 kernel]# cd ~/rpmbuild/RPMS/noarch/
[root@server19 noarch]# yum localinstall ganglia-web-3.5.10-1.noarch.rpm -y
此时如果在企业6中可以直接访问server19.example.com/ganglia,出现如下页面表示配置成功,在企业7中使用较新的ganglia-web时可能出现如下错误:
解决方法如下:
[root@server19 noarch]# cd /etc/httpd/conf.d/
[root@server19 conf.d]# rm -rf ganglia-web.conf
此时会刷新页面会出现如下错误:
解决方法如下:
[root@server19 conf.d]# chmod 777 /var/lib/ganglia-web/dwoo/*
#配置ganglia监控远程主机并进行分组监控
以下步骤在server19上实施:
[root@server19 ~]# cd ~/rpmbuild/RPMS/x86_64/
[root@server19 x86_64]# scp ganglia-gmond-* ganglia-devel-3.6.0-1.x86_64.rpm libganglia-3.6.0-1.x86_64.rpm root@192.168.122.7:~/kernel
[root@server19 x86_64]# vim /etc/ganglia/gmetad.conf
# data_source "my cluster" 10 localhost my.machine.edu:8649 1.2.3.5:8655
# data_source "my grid" 50 1.3.4.7:8655 grid.org:8651 grid-backup.org:8651
# data_source "another source" 1.3.4.7:8655 1.3.4.8
data_source "webcluster" localhost:8680 192.168.122.7:8680
[root@server19 x86_64]# vim /etc/ganglia/gmond.conf
cluster {
name = "webcluster"
owner = "unspecified"
latlong = "unspecified"
url = "unspecified"
}
…
udp_send_channel {
#bind_hostname = yes # Highly recommended, soon to be default.
# This option tells gmond to use a source address
# that resolves to the machine's hostname. Without
# this, the metrics may appear to come from any
# interface and the DNS names associated with
# those IPs will be used to create the RRDs.
mcast_join = 239.2.11.71
port = 8680
ttl = 1
}
…
udp_recv_channel {
mcast_join = 239.2.11.71
port = 8680
bind = 239.2.11.71
retry_bind = true
# Size of the UDP buffer. If you are handling lots of metrics you really
# should bump it up to e.g. 10MB or even higher.
# buffer = 10485760
}
…
tcp_accept_channel {
port = 8680
# If you want to gzip XML output
gzip_output = no
}
以下步骤在server7上实施:
[root@server7 ~]# cd ~/kernel/
[root@server7 kernel]# rpm -ivh ganglia-gmond-*
此时会出现如下错误:
error: Failed dependencies:
libconfuse.so.0()(64bit) is needed by ganglia-gmond-3.6.0-1.x86_64
libganglia-3.6.0.so.0()(64bit) is needed by ganglia-gmond-3.6.0-1.x86_64
libconfuse.so.0()(64bit) is needed by ganglia-gmond-modules-python-3.6.0-1.x86_64
解决方法如下:
[root@server7 kernel]# rpm -ivh libconfuse-*
[root@server7 kernel]# rpm -ivh libganglia-3.4.0-1.x86_64.rpm
[root@server7 kernel]# rpm -ivh ganglia-devel-3.4.0-1.x86_64.rpm
此时会出现如下错误:
error: Failed dependencies:
apr-devel > 1 is needed by ganglia-devel-3.6.0-1.x86_64
expat-devel is needed by ganglia-devel-3.6.0-1.x86_64
解决方法如下:
[root@server7 kernel]# yum install apr-devel expat-devel -y
[root@server7 kernel]# vim /etc/ganglia/gmond.conf
cluster {
name = "webcluster"
owner = "unspecified"
latlong = "unspecified"
url = "unspecified"
}
…
udp_send_channel {
#bind_hostname = yes # Highly recommended, soon to be default.
# This option tells gmond to use a source address
# that resolves to the machine's hostname. Without
# this, the metrics may appear to come from any
# interface and the DNS names associated with
# those IPs will be used to create the RRDs.
mcast_join = 239.2.11.71
port = 8680
ttl = 1
}
…
udp_recv_channel {
mcast_join = 239.2.11.71
port = 8680
bind = 239.2.11.71
retry_bind = true
# Size of the UDP buffer. If you are handling lots of metrics you really
# should bump it up to e.g. 10MB or even higher.
# buffer = 10485760
}
…
tcp_accept_channel {
port = 8680
# If you want to gzip XML output
gzip_output = no
}
注:服务端设置端口与客户端设置端口需要一致,此处均为8680
[root@server7 kernel]# /etc/init.d/gmond start
若在server19的/var/lib/ganglia/rrds/my\ cluster/ 下出现server7.example.com的目录说明配置成功
注:数据存储在/var/lib/ganglia/rrds中,如果重新配置了组名等信息,需要先暂停gmetad和gmond服务,将/var/lib/ganglia/rrds中的文件全部清空,然后启动服务,让服务自动重新建立数据文件。
至此ganglia的安装及配置完毕!!!
4.将nagios与ganglia整合
[root@server19 ~]# find / -name check_ganglia.py
[root@server19 ~]# cp /root/rpmbuild/BUILD/ganglia-3.4.0/contrib/check_ganglia.py /usr/local/nagios/libexec/
[root@server19 ~]# chmod +x /usr/local/nagios/libexec/check_ganglia.py
[root@server19 ~]# chown nagios.nagios /usr/local/nagios/libexec/check_ganglia.py
[root@server19 ~]# vim /usr/local/nagios/libexec/check_ganglia.py
if critical > warning:
if value >= critical:
print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
sys.exit(2)
elif value >= warning:
print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
sys.exit(1)
else:
print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
sys.exit(0)
else:
if critical >= value:
print "CHECKGANGLIA CRITICAL: %s is %.2f" % (metric, value)
sys.exit(2)
elif warning >= value:
print "CHECKGANGLIA WARNING: %s is %.2f" % (metric, value)
sys.exit(1)
else:
print "CHECKGANGLIA OK: %s is %.2f" % (metric, value)
sys.exit(0)
测试:
/usr/local/nagios/libexec/check_ganglia.py -h server7.example.com -m disk_free_percent_rootfs -w 20 -c 10
CHECKGANGLIA OK: disk_free_percent_rootfs is 73.57
/usr/local/nagios/libexec/check_ganglia.py -h server7.example.com -m disk_free_percent_rootfs -w 80 -c 70
CHECKGANGLIA WARNING: disk_free_percent_rootfs is 73.57
/usr/local/nagios/libexec/check_ganglia.py -h server7.example.com -m disk_free_percent_rootfs -w 90 -c 80
CHECKGANGLIA CRITICAL: disk_free_percent_rootfs is 73.57
注:-h后所指定的主机名或IP需要与/var/lib/ganglia/rrds/my cluster/中的文件夹名称对应
[root@server19 ~]# cd /usr/local/nagios/etc/objects/
[root@server19 objects]# vim commands.cfg
# 'check_ganglia' command definition
define command{
command_name check_ganglia
command_line $USER1$/check_ganglia.py -h $HOSTNAME$ -m $ARG1$ -w $ARG2$ -c $ARG3$
}
[root@server19 objects]# vim hosts.cfg
define host{
use linux-server
host_name server7.example.com
alias ganglia and mysql client
address 192.168.122.7
}
define hostgroup {
hostgroup_name ganglia-servers
alias ganglia-servers
members server7.example.com
}
[root@server19 objects]# vim templates.cfg
define service {
use generic-service
name ganglia-service
hostgroup_name ganglia-servers
service_groups ganglia-metrics
}
[root@server19 objects]# vim services.cfg
define servicegroup {
servicegroup_name ganglia-metrics
alias ganglia-metrics
}
define service{
use ganglia-service
service_description 根空闲
check_command check_ganglia!disk_free_percent_rootfs!20!10
}
define service{
use ganglia-service
service_description 内存空间
check_command check_ganglia!mem_free!50000!30000(少于50M警告,少于30M紧急)
}
[root@server19 objects]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg (检测配置是否有错误)
[root@server19 objects]# /etc/init.d/nagios reload
访问server19.example.com/nagios出现如下页面说明配置成功.
至此nagios和ganglia整合完毕!!!
5.将nagios与cacti整合
可参考: https://trac.assembla.com/npc
首先确保你的php支持pdo和json模块
[root@server19 kernel]# tar zxf npc-2.0.4.tar.gz -C /var/www/html/cacti/plugins
启动npc插件
选择Setting
选择NPC
如上图配置并保存
[root@server19 kernel]# vim /usr/local/nagios/etc/nagios.cfg
broker_module=/usr/local/nagios/bin/ndomod.o config_file=/usr/local/nagios/etc/ndomod.cfg
[root@server19 kernel]# tar zxf ndoutils-1.5.2.tar.gz (ndoutils负责将nagios采集到的数据存到数据库中,通过rrdtool进行绘图)
[root@server19 kernel]# cd ndoutils-1.5.2
[root@server19 ndoutils-1.5.2]# ./configure
[root@server19 ndoutils-1.5.2]# make
[root@server19 ndoutils-1.5.2]# cd src/
[root@server19 src]# cp ndomod-3x.o /usr/local/nagios/bin/ndomod.o
[root@server19 src]# cp ndo2db-3x /usr/local/nagios/bin/ndo2db
[root@server19 src]# cd ../config
[root@server19 config]# cp ndomod.cfg-sample /usr/local/nagios/etc/ndomod.cfg
[root@server19 config]# cp ndo2db.cfg-sample /usr/local/nagios/etc/ndo2db.cfg
[root@server19 config]# chown nagios.nagios /usr/local/nagios/bin/ -R
[root@server19 config]# chown nagios.nagios /usr/local/nagios/etc/ -R
[root@server19 config]# cd /usr/local/nagios/etc/
[root@server19 etc]# vim ndo2db.cfg
ndo2db_user=nagios
ndo2db_group=nagios
#socket_type=unix
socket_type=tcp
socket_name=/usr/local/nagios/var/ndo.sock
tcp_port=5668
db_servertype=mysql
db_host=localhost
db_port=3306
db_name=cacti
db_prefix=npc_
db_user=cacti
db_pass=cacti
max_timedevents_age=1440
max_systemcommands_age=10080
max_servicechecks_age=10080
max_hostchecks_age=10080
max_eventhandlers_age=44640
max_externalcommands_age=44640
debug_level=1
debug_verbosity=1
debug_file=/usr/local/nagios/var/ndo2db.debug
max_debug_file_size=1000000
[root@server19 etc]# vim ndomod.cfg
instance_name=default
output_type=tcpsocket
#output_type=unixsocket
output=127.0.0.1
#output=/usr/local/nagios/var/ndo.sock
tcp_port=5668
output_buffer_items=5000
buffer_file=/usr/local/nagios/var/ndomod.tmp
file_rotation_interval=14400
file_rotation_timeout=60
reconnect_interval=15
reconnect_warning_interval=15
data_processing_options=-1
config_output_options=2
[root@server19 etc]# /etc/init.d/nagios reload
[root@server19 etc]# /usr/local/nagios/bin/ndo2db -c /usr/local/nagios/etc/ndo2db.cfg
[root@server19 etc]# ps ax
7877 ? Ss 0:00 /usr/local/nagios/bin/ndo2db -c /usr/local/nagios/etc
查看到类似如上进程为正常
访问server19.example.com/cacti可在npc下看到如下页面
此时执行cat /var/log/messages可看到如下报错:
Oct 13 15:43:29 server19 ndo2db: mysql_error: 'Unknown column 'long_output' in 'field list''
解决方法如下:
当使用的npc插件版本较低时使用如下sql语句向数据库中添加字段:
[root@server19 kernel]# mysql cacti < add-old.sql
add-old.sql中的内容为:
ALTER TABLE `npc_hostchecks` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_hoststatus` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_servicechecks` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_servicestatus` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_statehistory` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_eventhandlers` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_systemcommands` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_notifications` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
当使用的npc插件版本较高时使用如下sql语句向数据库中添加字段:
[root@server19 kernel]# mysql cacti < add-new.sql
add-new.sql中的内容为:
CREATE TABLE IF NOT EXISTS `npc_service_parentservices` (
`service_parentservice_id` int(11) NOT NULL auto_increment,
`instance_id` smallint(6) NOT NULL default '0',
`service_id` int(11) NOT NULL default '0',
`parent_service_object_id` int(11) NOT NULL default '0',
PRIMARY KEY (`service_parentservice_id`),
UNIQUE KEY `instance_id` (`service_id`,`parent_service_object_id`)
) ENGINE=MyISAM COMMENT='Parent services';
ALTER TABLE `npc_hostchecks` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_hoststatus` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_servicechecks` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_servicestatus` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_statehistory` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_eventhandlers` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_systemcommands` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_notifications` ADD COLUMN `long_output` varchar(8192) NOT NULL default '' AFTER `output`;
ALTER TABLE `npc_services` ADD COLUMN `importance` varchar(8192) NOT NULL default '' AFTER `icon_image_alt`;
ALTER TABLE `npc_contacts` ADD COLUMN `minimum_importance` varchar(8192) NOT NULL default '' AFTER `notify_host_downtime`;
ALTER TABLE `npc_hosts` ADD COLUMN `importance` varchar(8192) NOT NULL default '' AFTER `z_3d`;
访问server19.example.com/cacti能看到如下页面说明配置成功.
至此nagios和cacti整合完毕!!!
监控系统配置完毕!!!