夜莺监控

 

夜莺监控 文章参考 

https://blog.csdn.net/m0_61318185/article/details/136303124

https://blog.csdn.net/weixin_62173811/article/details/130189727

官网地址

夜莺项目整体介绍 - 快猫星云 (flashcat.cloud)

 

需要安装下载的软件

MYSQL 

MYSQL8.0 安装文档mysql8.0详细安装 - 不会游泳的鱼丶 - 博客园 (cnblogs.com)

# install mysql
yum -y install mariadb*
systemctl enable mariadb
systemctl restart mariadb
mysql -e "SET PASSWORD FOR 'root'@'localhost' = PASSWORD('1234');"

# install redis
yum install -y redis
systemctl enable redis
systemctl restart redis

 

VM时序库

#解压缩只有二进制的文件,是通过参数来调整配置,而没有配置文件.可以用systemctl管理
nohup ./victoria-metrics-prod  & 

N9E 程序项目-WEB端

mkdir -p /opt/n9e && cd /opt/n9e

# 去 https://github.com/ccfos/nightingale/releases 找最新版本的包,文档里的包地址可能已经不是最新的了
tarball=n9e-v6.0.0-ga.4.0.1-linux-amd64.tar.gz
urlpath=https://download.flashcat.cloud/${tarball}
wget $urlpath || exit 1

tar zxvf ${tarball} 

#进入解压目录 导入n9e数据库文件
mysql -uroot -p1234 < n9e.sql

#可以进入解压目录下的etc/config.toml,修改服务默认端口以及数据库的连接地址配置(特别注意VictoriaMetrics的单机和集群版配置url写法不同)
[[Pushgw.Writers]]
#集群版VictoriaMetrics配置
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"
#单机版ictoriaMetrics配置
# Url = "http://127.0.0.1:9090/api/v1/write"

#注意一定要修改配置文件中的HTTP各个部分的BasciAuth


#启动服务
nohup ./n9e &> n9e.log &

# check logs tail ./n9e.log
#如果启动成功,n9e 默认会监听在 17000 端口。上面使用 nohup 简单演示,生产环境建议用 systemd 托管

 

 

n9e配置文件

[Global]
RunMode = "release"

[Log]
#Output 改成file日志就会输出到Dir对应的目录中,需要配置日志切割,按照KeepHours的时间进行切割,KeepHours= 4就是保存4个小时的日志。或者按照大小来切分,RotateNum = 3 和RotateSize = 256 就是每个日志大小256m,保存3个
# log write dir
Dir = "logs"
# log level: DEBUG INFO WARNING ERROR
Level = "DEBUG"
# stdout, stderr, file
Output = "stdout"
# # rotate by time
# KeepHours= 4
# # rotate by size
# RotateNum = 3
# # unit: MB
# RotateSize = 256

[HTTP]
# http listening address
Host = "0.0.0.0"
# http listening port
Port = 17000
# https cert file path
CertFile = ""
# https key file path
KeyFile = ""
# whether print access log
PrintAccessLog = false
# whether enable pprof
PProf = false
# expose prometheus /metrics?
ExposeMetrics = true
# http graceful shutdown timeout, unit: s
ShutdownTimeout = 30
# max content length: 64M
MaxContentLength = 67108864
# http server read timeout, unit: s
ReadTimeout = 20
# http server write timeout, unit: s
WriteTimeout = 40
# http server idle timeout, unit: s
IdleTimeout = 120


下面这些BasicAuth接口的认证信息注意改一下
[HTTP.Pushgw]
Enable = true 
# [HTTP.Pushgw.BasicAuth]
# user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

[HTTP.Alert]
Enable = true 
[HTTP.Alert.BasicAuth]
user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

[HTTP.Heartbeat]
Enable = true 
# [HTTP.Heartbeat.BasicAuth]
# user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

[HTTP.Service]
Enable = true 
[HTTP.Service.BasicAuth]
user001 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

[HTTP.JWTAuth]
# signing key  注意改一下
SigningKey = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
# unit: min
AccessExpired = 1500
# unit: min
RefreshExpired = 10080
RedisKeyPrefix = "/jwt/"



[HTTP.ProxyAuth]
# if proxy auth enabled, jwt auth is disabled
Enable = false
# username key in http proxy header
HeaderUserNameKey = "X-User-Name"
DefaultRoles = ["Standard"]

[DB]
# postgres: host=%s port=%s user=%s dbname=%s password=%s sslmode=%s
# 下面是mysql的dsn。上面是postgres的dsn写法
DSN="root:1234@tcp(127.0.0.1:3306)/n9e_v6?charset=utf8mb4&parseTime=True&loc=Local&allowNativePasswords=true"
# enable debug mode or not
Debug = false
# mysql postgres
DBType = "mysql"
# unit: s
MaxLifetime = 7200
# max open connections
MaxOpenConns = 150
# max idle connections
MaxIdleConns = 50
# table prefix
TablePrefix = ""
# enable auto migrate or not
# EnableAutoMigrate = false

[Redis]
# address, ip:port or ip1:port,ip2:port for cluster and sentinel(SentinelAddrs) ga3可以支持集群版
Address = "127.0.0.1:6379"
# Username = ""
# Password = ""
# DB = 0
# UseTLS = false
# TLSMinVersion = "1.2"
# standalone cluster sentinel
RedisType = "standalone"
# Mastername for sentinel type
# MasterName = "mymaster"
# SentinelUsername = ""
# SentinelPassword = ""

[Alert]
[Alert.Heartbeat]
# auto detect if blank,为空会自动探测
IP = ""
# unit ms    告警引擎的心跳时间,默认1000ms。多个n9e分摊告警的匹配,对全量的告警引擎中心端的心跳机制
Interval = 1000
ClusterName = "default"

# [Alert.Alerting]
# NotifyConcurrency = 10

[Center]
MetricsYamlFile = "./etc/metrics.yaml"
I18NHeaderKey = "X-Language"

[Center.AnonymousAccess]
#是不是可以匿名访问时序数据的接口,可以匿名访问告警事件的详情页面,为了安全性可以改成false
PromQuerier = true
AlertDetail = true

[Pushgw]
# use target labels in database instead of in series
LabelRewrite = true
# # default busigroup key name
# BusiGroupLabelKey = "busigroup"
# ForceUseServerTS = false

# [Pushgw.DebugSample]
# ident = "xx"
# __name__ = "xx"

# [Pushgw.WriterOpt]
# # Writer Options
# QueueCount = 1000
# QueueMaxSize = 1000000
# QueuePopSize = 1000
# # ident or metric
# ShardingKey = "ident"

[[Pushgw.Writers]] 
# Url = "http://127.0.0.1:8480/insert/0/prometheus/api/v1/write"
Url = "http://127.0.0.1:9090/api/v1/write"
# Basic auth username
BasicAuthUser = ""
# Basic auth password
BasicAuthPass = ""
# timeout settings, unit: ms
Headers = ["X-From", "n9e"]
Timeout = 10000
DialTimeout = 3000
TLSHandshakeTimeout = 30000
ExpectContinueTimeout = 1000
IdleConnTimeout = 90000
# time duration, unit: ms
KeepAlive = 30000
MaxConnsPerHost = 0
MaxIdleConns = 100
MaxIdleConnsPerHost = 100
## Optional TLS Config
# UseTLS = false
# TLSCA = "/etc/n9e/ca.pem"
# TLSCert = "/etc/n9e/cert.pem"
# TLSKey = "/etc/n9e/key.pem"
# InsecureSkipVerify = false
# [[Writers.WriteRelabels]]
# Action = "replace"
# SourceLabels = ["__address__"]
# Regex = "([^:]+)(?::\\d+)?"
# Replacement = "$1:80"
# TargetLabel = "__address__"

 

 

Categraf  数据采集、需要监控那个服务器就在哪个服务器上部署

#在目标机器部署,只需要 categraf 二进制、以及 conf 目录,
#conf 下有一个主配置文件:config.toml,定义机器名、全局采集频率、全局附加标签、remote write backend地址等;另外就是各种采集插件的配置目录,以input.打头,如果某个采集器 xx 不想启用,把 input.xx 改个其他前缀(或者删除这个目录),比如 bak.input.xx,categraf 就会忽略这个采集器。
vim config.toml
#修改n9e的地址,通过这个地址来推数据
[[writers]]
url = "http://127.0.0.1:17000/prometheus/v1/write"
#修改heartbeat为true,里面的地址改成n9e的地址,通过这个地址来心跳
[heartbeat]
enable = true
# report os version cpu.util mem.util metadata
url = "http://127.0.0.1:17000/v1/n9e/heartbeat"



#启动前测试
./categraf --test --debug
#启动categraf.可以用nohup,不过最好可以用systemctl 托管
nohup ./categraf &
#启动成功之后,在web页面里面的对象列表可以看到这台机器(因为上面的heartbeat的配置,heartbeat的配置会上报redis,然后从redis里面读的)

 

 

 

posted @ 2024-06-14 09:10  不会游泳的鱼丶  阅读(33)  评论(0编辑  收藏  举报