使用 smokeping 监控网络状况 - 迁

 

 

基础

环境

centos 7.2 
apache 2.4.6 
smokeping 2.006

安装

yum install rrdtool perl perl-Net-Telnet perl-Net-DNS perl-LDAP perl-libwww-perl perl-RadiusPerl perl-IO-Socket-SSL perl-Socket6 perl-CGI-SpeedyCGI cpan fping -y

使用 cpan

install Config::Grammar 
install rrdtool 

 

源码安装 smokeping 

./configure --prefix=/usr/local/smokeping 
/usr/bin/gmake install 

 

 使用模块检测并安装

./setup/build-perl-modules.sh /usr/local/smokeping/thirdparty

 

 注意安装过程中遇到 smokeping 编译失败的问题,一般是缺少 perl 相关包 造成的,特别是 : perl-libwww-perl-6.05-2.el7.noarch

一般都会报 RRDs 找不到,可以下载 rrdtool 直接拷贝使用

RRDs 库,perl 检测模块的时候会提示找不到

cd rrdtool/lib/perl/5.10.1/x86_64-linux-thread-multi/RRDs.pm /usr/lib64/perl5/
cp -r rrdtool/lib/perl/5.10.1/x86_64-linux-thread-multi/auto/RRDs/ /usr/lib64/perl5/
cp rrdtool/lib/perl/5.10.1/x86_64-linux-thread-multi/auto/RRDs/RRDs.so /usr/lib64/perl5

 

除了 RRDs 比较坑,其他安装都简单的,配置下 apache 就可以了

<VirtualHost *:9999>

DocumentRoot / 
ServerName smokeping.com
ErrorLog logs/smokeping-error.log
CustomLog logs/smokeping-access.log common
Alias /cache "/usr/local/smokeping/cache/"
Alias /cropper "/usr/local/smokeping/htdocs/cropper/"
Alias /smokeping "/usr/local/smokeping/htdocs/smokeping.fcgi"
Alias /download "/usr/local/download"
<Directory "/usr/local/smokeping">
AddHandler cgi-script .fcgi .cgi
Options ExecCGI
<IfModule dir_module>
     DirectoryIndex smokeping.fcgi
</IfModule>
AllowOverride All
Require all granted
</Directory>

<Directory "/usr/local/download">
Options Indexes FollowSymLinks
AllowOverride All
Require all granted
</Directory>

</VirtualHost>
View Code

config 配置文件

一般设置

*** General ***

owner    = Peter Random
contact  = some@address.nowhere
mailhost = my.mail.host
#sendmail = /usr/sbin/sendmail
# NOTE: do not put the Image Cache below cgi-bin
# since all files under cgi-bin will be executed ... this is not
# good for images.
imgcache = /usr/local/smokeping/cache
imgurl   = cache
datadir  = /usr/local/smokeping/data
piddir  = /usr/local/smokeping/var
cgiurl   = http://<IP>/smokeping.cgi # 此处要写smokeping的访问地址  
smokemail = /usr/local/smokeping/etc/smokemail.dist
tmail = /usr/local/smokeping/etc/tmail.dist
# specify this to get syslog logging
syslogfacility = local0
# each probe is now run in its own process
# disable this to revert to the old behaviour
# concurrentprobes = no

 

告警设置

*** Alerts ***
to = |/usr/local/smokeping/bin/alert.sh
from = sentinel@huored.com

+bigloss
type = loss
# in percent
pattern = ==0%,==0%,==0%,==0%,>0%,>0%,>0%
comment = suddenly there is packet loss

+someloss
type = loss
# in percent
pattern = >0%,*12*,>0%,*12*,>0%
comment = loss 3 times  in a row

+startloss
type = loss
# in percent
pattern = ==S,>0%,>0%,>0%
comment = loss at startup

+rttdetect
type = rtt
# in milli seconds
pattern = <10,<10,<10,<10,<10,<100,>100,>100,>100
comment = routing messed up again ?

+hostdown
type = loss
# in percent
pattern = ==0%,==0%,==0%, ==U
comment = no reply

+lossdetect #我一般使用lossdetect 策略
type = loss
# in percent
pattern = ==0%,==0%,==0%,==0%,>20%,>20%,>20%
comment = suddenly there is packet loss

 

检测类型

*** Probes ***

+ FPing    # ping检测,直接下载到 /usr/sbin/fping(注意glibc的版本)
binary = /usr/sbin/fping
packetsize = 1048  #设置ping的包大小

+ TCPPing  # 检测端口,有些高防IP不让ping的
binary = /usr/bin/tcpping  # 注意,tcpping是一个shell脚本并调用traceroute,系统上要把traceroute装一下
# The following variables can be overridden in each target section 可以在每个section里面定义port
pings = 5
port = 80   

注意 tcpping 不同与 tcping

下载 fping
github tcpping

监控列表

*** Targets ***

probe = FPing

menu = Top
title = Network Latency Grapher
remark = Welcome to the SmokePing website of xxx Company. \
         Here you will learn all about the latency of our network.

+ Bench
menu= Bench-Targets
#parents = owner:/Test/James location:/

++ localhost
menu=localhost
title=localhost
alerts=someloss
host=127.0.0.1

++ DX-DNS  # 一般我会加一个DNS对比一下本机的情况
menu=DX-DNS
title=202.101.172.35
alerts=lossdetect
host=202.101.172.35


+SD-IDC
menu=SD-IDC
++ SD-GW
menu=SD-GW
title=SD-GW-XXXXXXXX
alerts=lossdetect
host=XXXXXX

++ SD-JCZJ
probe = TCPPing  # 此处调用了tcpping来测试,不写的话默认是Fping
menu=SD-JCZJ
title=SD-JCZJ-XXXXXX
alerts=lossdetect
host=XXXXXXX

 

#!/bin/bash
#
# chkconfig: 2345 80 05
# Description: Smokeping init.d script
# Write by : linux-Leon_xiedi
# Get function from functions library
. /etc/init.d/functions
# Start the service Smokeping
function start() {
                echo -n "Starting Smokeping: "
                /usr/local/smokeping/bin/smokeping --logfile=/usr/local/smokeping/var/smokeping.log >/dev/null 2>&1
                ### Create the lock file ###
                touch /var/lock/subsys/smokeping
                success $"Smokeping startup"
                echo
}
# Restart the service Smokeping
function stop() {
                echo -n "Stopping Smokeping: "
                kill -9 `ps ax |grep "/usr/local/smokeping/bin/smokeping" | grep -v grep | awk '{ print $1 }'` >/dev/null 2>&1
                ### Now, delete the lock file ###
                rm -f /var/lock/subsys/smokeping
                success $"Smokeping shutdown"
                echo
}
#Show status about Smokeping
function status() {
                NUM="`ps -ef|grep "smokeping \[FPing\]"|grep -v grep|wc -l`"
                if [ "$NUM" == "0" ];then
                   echo "Smokeping is not run"
                else
                   echo "Smokeping is running"
                fi
 
}
### main logic ###
case "$1" in
start)
        start
        ;;
stop)
        stop
        ;;
status)
        status
        ;;
restart|reload)
        stop
        start
;;
*)
echo $"Usage: $0 {start|stop|restart|reload|status}"
exit 1
esac
exit 0
启动脚本

 

########################################################
# Script to email a mtr report on alert from Smokeping #
########################################################
alertname=$1
target=$2
losspattern=$3
rtt=$4
hostname=$5
smokename="ALIYUN-smokeping-"
if [ "$losspattern" = "loss: 0%" ];
then
subject="Clear-${smokename}-Alert: $target host: ${hostname}"
else
subject="${smokename}Alert: ${target} – ${hostname}"
fi
echo "MTR Report for hostname: ${hostname}" > /tmp/mtr.txt
echo "" >> /tmp/mtr.txt
#echo "sudo mtr -n –report ${hostname} "
#sudo /usr/sbin/mtr -n –report ${hostname} >> /tmp/mtr.txt
#echo "" >> /tmp/mtr.txt
echo "Name of Alert: " $alertname >> /tmp/mtr.txt
echo "Target: " $target >> /tmp/mtr.txt
echo "Loss Pattern: " $losspattern >> /tmp/mtr.txt
echo "RTT Pattern: " $rtt >> /tmp/mtr.txt
echo "Hostname: " $hostname >> /tmp/mtr.txt
echo "" >> /tmp/mtr.txt
#echo "Full mtr command is: sudo /usr/sbin/mtr -n –report ${hostname}" >> /tmp/mtr.txt
echo "subject: " $subject
if [ -s /tmp/mtr.txt ]; then
echo "----------------"
#cat /tmp/mtr.txt|mail -s "${subject}" $email

Email=/usr/bin/sendEmail

smtp=xxx
#发件人SMTP服务器
user=xxxx
#发件人账号
passwd='xxxxx'
#发件人密码
#cc=admin@attacker.club
#抄送
to="xxxx"
#收件人邮件地址
#subject=主题
body=$(cat /tmp/mtr.txt)

$Email  -f $user -s $smtp -xu $user -xp $passwd -t $to -u "$subject" -m "$body" -o  message-charset=utf-8


fi
邮件告警脚本

 

排错

/usr/local/smokeping/bin/smokeping --logfile=/usr/local/smokeping/var/smokeping.log --debug

 

效果如图

 

分布式监控

smokeping 的 slave 模式

/usr/local/smokeping/bin/smokeping --master-url=http://XXXXXXXXXX:29999/smokeping --cache-dir=/usr/local/smokeping/var --logfile=/usr/local/smokeping/var/smokeping.log --shared-secret=/usr/local/smokeping/etc/smokeping_secrets.dist --slave-name=zhicai 

注意: smokeping_secrets.dist 只需要写密码

 

看懂 smokeping 展示图

 

X 轴表示时间轴
Y 轴表示 ping 的时间值
3.6ms 表示 Ping 质量测试的响应速度平均值
中间红线能看出网络是否有抖动
直线表示稳定, 有频繁曲线表示网络抖动;如果是阴影表示有网络小幅度抖动
ls 字段表示 Ping 质量测试的丢包率

 

根据网络抖动判断,抖动范围超过 10ms 的都属于网络不稳定我们要每天观察是否都有规律的网络抖动现象!

 

从这个报告图里可以看出:
1. 曲线都是绿色的 0 丢包或偶尔一两个丢包算合格
2. 曲线无抖动, 阴影不明显或偶尔有抖动的算合格 (包裹阴影部分)
3.Ping 值小于 30ms 如果小于 50ms 还算合格 

 

posted @ 2017-11-08 12:31  richardzgt  阅读(2021)  评论(0编辑  收藏  举报