夜莺监控
夜莺监控 文章参考
https://blog.csdn.net/m0_61318185/article/details/136303124
https://blog.csdn.net/weixin_62173811/article/details/130189727
官网地址
夜莺项目整体介绍 - 快猫星云 (flashcat.cloud)
需要安装下载的软件
MYSQL
MYSQL8.0 安装文档mysql8.0详细安装 - 不会游泳的鱼丶 - 博客园 (cnblogs.com)
# install mysql yum -y install mariadb* systemctl enable mariadb systemctl restart mariadb mysql -e "SET PASSWORD FOR 'root'@'localhost' = PASSWORD('1234');" # install redis yum install -y redis systemctl enable redis systemctl restart redis
VM时序库
#解压缩只有二进制的文件,是通过参数来调整配置,而没有配置文件.可以用systemctl管理
nohup ./victoria-metrics-prod &
N9E 程序项目-WEB端
mkdir -p /opt/n9e && cd /opt/n9e # 去 https://github.com/ccfos/nightingale/releases 找最新版本的包,文档里的包地址可能已经不是最新的了 tarball=n9e-v6.0.0-ga.4.0.1-linux-amd64.tar.gz urlpath=https://download.flashcat.cloud/${tarball} wget $urlpath || exit 1 tar zxvf ${tarball} #进入解压目录 导入n9e数据库文件 mysql -uroot -p1234 < n9e.sql
#可以进入解压目录下的etc/config.toml,修改服务默认端口以及数据库的连接地址配置(特别注意VictoriaMetrics的单机和集群版配置url写法不同)
[[Pushgw.Writers]]
#集群版VictoriaMetrics配置
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"
#单机版ictoriaMetrics配置
# Url = "http://127.0.0.1:9090/api/v1/write"
#注意一定要修改配置文件中的HTTP各个部分的BasciAuth
#启动服务
nohup ./n9e &> n9e.log &
# check logs tail ./n9e.log
#如果启动成功,n9e 默认会监听在 17000 端口。上面使用 nohup 简单演示,生产环境建议用 systemd 托管
n9e配置文件
[Global] RunMode = "release" [Log] #Output 改成file日志就会输出到Dir对应的目录中,需要配置日志切割,按照KeepHours的时间进行切割,KeepHours= 4就是保存4个小时的日志。或者按照大小来切分,RotateNum = 3 和RotateSize = 256 就是每个日志大小256m,保存3个 # log write dir Dir = "logs" # log level: DEBUG INFO WARNING ERROR Level = "DEBUG" # stdout, stderr, file Output = "stdout" # # rotate by time # KeepHours= 4 # # rotate by size # RotateNum = 3 # # unit: MB # RotateSize = 256 [HTTP] # http listening address Host = "0.0.0.0" # http listening port Port = 17000 # https cert file path CertFile = "" # https key file path KeyFile = "" # whether print access log PrintAccessLog = false # whether enable pprof PProf = false # expose prometheus /metrics? ExposeMetrics = true # http graceful shutdown timeout, unit: s ShutdownTimeout = 30 # max content length: 64M MaxContentLength = 67108864 # http server read timeout, unit: s ReadTimeout = 20 # http server write timeout, unit: s WriteTimeout = 40 # http server idle timeout, unit: s IdleTimeout = 120 下面这些BasicAuth接口的认证信息注意改一下 [HTTP.Pushgw] Enable = true # [HTTP.Pushgw.BasicAuth] # user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" [HTTP.Alert] Enable = true [HTTP.Alert.BasicAuth] user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" [HTTP.Heartbeat] Enable = true # [HTTP.Heartbeat.BasicAuth] # user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" [HTTP.Service] Enable = true [HTTP.Service.BasicAuth] user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" [HTTP.JWTAuth] # signing key 注意改一下 SigningKey = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" # unit: min AccessExpired = 1500 # unit: min RefreshExpired = 10080 RedisKeyPrefix = "/jwt/" [HTTP.ProxyAuth] # if proxy auth enabled, jwt auth is disabled Enable = false # username key in http proxy header HeaderUserNameKey = "X-User-Name" DefaultRoles = ["Standard"] [DB] # postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s # 下面是mysql的dsn。上面是postgres的dsn写法 DSN="root:1234@tcp(127.0.0.1:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true" # enable debug mode or not Debug = false # mysql postgres DBType = "mysql" # unit: s MaxLifetime = 7200 # max open connections MaxOpenConns = 150 # max idle connections MaxIdleConns = 50 # table prefix TablePrefix = "" # enable auto migrate or not # EnableAutoMigrate = false [Redis] # address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs) ga3可以支持集群版 Address = "127.0.0.1:6379" # Username = "" # Password = "" # DB = 0 # UseTLS = false # TLSMinVersion = "1.2" # standalone cluster sentinel RedisType = "standalone" # Mastername for sentinel type # MasterName = "mymaster" # SentinelUsername = "" # SentinelPassword = "" [Alert] [Alert.Heartbeat] # auto detect if blank,为空会自动探测 IP = "" # unit ms 告警引擎的心跳时间,默认1000ms。多个n9e分摊告警的匹配,对全量的告警引擎中心端的心跳机制 Interval = 1000 ClusterName = "default" # [Alert.Alerting] # NotifyConcurrency = 10 [Center] MetricsYamlFile = "./etc/metrics.yaml" I18NHeaderKey = "X-Language" [Center.AnonymousAccess] #是不是可以匿名访问时序数据的接口,可以匿名访问告警事件的详情页面,为了安全性可以改成false PromQuerier = true AlertDetail = true [Pushgw] # use target labels in database instead of in series LabelRewrite = true # # default busigroup key name # BusiGroupLabelKey = "busigroup" # ForceUseServerTS = false # [Pushgw.DebugSample] # ident = "xx" # __name__ = "xx" # [Pushgw.WriterOpt] # # Writer Options # QueueCount = 1000 # QueueMaxSize = 1000000 # QueuePopSize = 1000 # # ident or metric # ShardingKey = "ident" [[Pushgw.Writers]] # Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write" Url = "http://127.0.0.1:9090/api/v1/write" # Basic auth username BasicAuthUser = "" # Basic auth password BasicAuthPass = "" # timeout settings, unit: ms Headers = ["X-From", "n9e"] Timeout = 10000 DialTimeout = 3000 TLSHandshakeTimeout = 30000 ExpectContinueTimeout = 1000 IdleConnTimeout = 90000 # time duration, unit: ms KeepAlive = 30000 MaxConnsPerHost = 0 MaxIdleConns = 100 MaxIdleConnsPerHost = 100 ## Optional TLS Config # UseTLS = false # TLSCA = "/etc/n9e/ca.pem" # TLSCert = "/etc/n9e/cert.pem" # TLSKey = "/etc/n9e/key.pem" # InsecureSkipVerify = false # [[Writers.WriteRelabels]] # Action = "replace" # SourceLabels = ["__address__"] # Regex = "([^:]+)(?::\\d+)?" # Replacement = "$1:80" # TargetLabel = "__address__"
Categraf 数据采集、需要监控那个服务器就在哪个服务器上部署
#在目标机器部署,只需要 categraf 二进制、以及 conf 目录, #conf 下有一个主配置文件:config.toml,定义机器名、全局采集频率、全局附加标签、remote write backend地址等;另外就是各种采集插件的配置目录,以input.打头,如果某个采集器 xx 不想启用,把 input.xx 改个其他前缀(或者删除这个目录),比如 bak.input.xx,categraf 就会忽略这个采集器。 vim config.toml #修改n9e的地址,通过这个地址来推数据 [[writers]] url = "http://127.0.0.1:17000/prometheus/v1/write" #修改heartbeat为true,里面的地址改成n9e的地址,通过这个地址来心跳 [heartbeat] enable = true # report os version cpu.util mem.util metadata url = "http://127.0.0.1:17000/v1/n9e/heartbeat"
#启动前测试
./categraf --test --debug
#启动categraf.可以用nohup,不过最好可以用systemctl 托管
nohup ./categraf &
#启动成功之后,在web页面里面的对象列表可以看到这台机器(因为上面的heartbeat的配置,heartbeat的配置会上报redis,然后从redis里面读的)