1、添加告警配置
vim configs/alarm-settings.yml
dingtalkHooks:
textTemplate: |-
{
"msgtype": "text",
"text": {
"content": "Apache SkyWalking Alarm: \n %s."
}
}
webhooks:
- url: https://oapi.dingtalk.com/robot/send?access_token=<access_token>
secret: <加签值>
2、添加apollo配置
vim application.yml
找到configuration 下apollo配置
apollo 添加配置
修改后的监控规则配置
rules:
# Rule unique name, must be ended with `_rule`.
service_resp_time_rule:
metrics-name: service_resp_time
op: ">"
threshold: 10000
period: 5
count: 3
silence-period: 3
message: 服务 {name} 的响应时间在最近5分钟内有3分钟超过了10秒
service_sla_rule:
# Metrics value need to be long, double or int
metrics-name: service_sla
op: "<"
threshold: 8000
# The length of time to evaluate the metrics
period: 5
# How many times after the metrics match the condition, will trigger alarm
count: 3
# How many times of checks, the alarm keeps silence after alarm triggered, default as same as period.
silence-period: 3
message: 服务 {name} 的成功率在最近5分钟内有3分钟低于80%
service_resp_time_percentile_rule:
# Metrics value need to be long, double or int
metrics-name: service_percentile
op: ">"
threshold: 10000,10000,10000,10000,10000
period: 5
count: 3
silence-period: 5
message: 服务 {name} 的百分位响应告警时间在最近5分钟内有3分钟出现告警, 由于超过了p50 > 10000, p75 > 10000, p90 > 10000, p95 > 10000, p99 > 10000之中的某个条件
service_instance_resp_time_rule:
metrics-name: service_instance_resp_time
op: ">"
threshold: 10000
period: 5
count: 3
silence-period: 5
message: 服务实例 {name} 的响应时间在最近5分钟内有3分钟超过了10秒。
database_access_resp_time_rule:
metrics-name: database_access_resp_time
threshold: 10000
op: ">"
period: 5
count: 3
message: 数据库 {name} 访问的响应时间在最近5分钟内有3分钟超过了10秒
endpoint_relation_resp_time_rule:
metrics-name: endpoint_relation_resp_time
threshold: 10000
op: ">"
period: 5
count: 3
message: 端点关系 {name} 的响应时间在最近5分钟内有3分钟超过了10秒
# Active endpoint related metrics alarm will cost more memory than service and service instance metrics alarm.
# Because the number of endpoint is much more than service and instance.
#
# endpoint_avg_rule:
# metrics-name: endpoint_avg
# op: ">"
# threshold: 1000
# period: 10
# count: 2
# silence-period: 5
# message: Response time of endpoint {name} is more than 1000ms in 2 minutes of last 10 minutes
dingtalkHooks:
textTemplate: |-
{
"msgtype": "text",
"text": {
"content": "Apache SkyWalking Alarm: \n %s."
}
}
webhooks:
- url: https://oapi.dingtalk.com/robot/send?access_token=cea97f9dc89fe65cc6b8ce56d202e3fe5ccb4e57335647b37d74aa10694fa6f0
secret: SEC88c82dea0189dcd90558c77963e4f1aac984ff2edd7a4ad7ee74f58dab47bd96
#webhooks:
# - http://127.0.0.1/notify/
# - http://127.0.0.1/go-wechat/