Net6 对接 SkyWalking 链路追踪

部署Skywalking环境

version: '3.3'
services:
  elasticsearch:
    image: docker.elastic.co/elasticsearch/elasticsearch:7.5.0
    container_name: elasticsearch
    restart: always
    ports:
      - 9200:9200
    environment:
      - discovery.type=single-node
      - bootstrap.memory_lock=true
      - "ES_JAVA_OPTS=-Xms256m -Xmx256m"
    ulimits:
      memlock:
        soft: -1
        hard: -1
  oap:
    image: apache/skywalking-oap-server:8.8.0
    container_name: oap
    depends_on:
      - elasticsearch
    links:
      - elasticsearch
    restart: always
    ports:
      - 11800:11800
      - 12800:12800
    environment:
      SW_STORAGE: elasticsearch
      SW_STORAGE_ES_CLUSTER_NODES: elasticsearch:9200
  ui:
    image: apache/skywalking-ui
    container_name: ui
    depends_on:
      - oap
    links:
      - oap
    restart: always
    ports:
      - 8080:8080
    environment:
      SW_OAP_ADDRESS: http://oap:12800

 

对接.NET6 程序

添加依赖

<ItemGroup>
    <PackageReference Include="SkyAPM.Agent.AspNetCore" Version="1.3.0" />
</ItemGroup>

 

编辑Skywalking配置文件skyapm.json

{
  "SkyWalking": {
    "ServiceName": "MySkyWalkingDemoTest",
    "Namespace": "",
    "HeaderVersions": [
      "sw8"
    ],
    "Sampling": {
      "SamplePer3Secs": -1,
      "Percentage": -1.0
    },
    "Logging": {
      "Level": "Information",
      "FilePath": "logs\\skyapm-{Date}.log"
    },
    "Transport": {
      "Interval": 3000,
      "ProtocolVersion": "v8",
      "QueueSize": 30000,
      "BatchSize": 3000,
      "gRPC": {
        "Servers": "192.168.3.245:11800",
        "Timeout": 10000,
        "ConnectTimeout": 10000,
        "ReportTimeout": 600000,
        "Authentication": ""
      }
    }
  }
}

SkyAPM Config 配置说明
ServiceName
服务名称

Sampling
采样配置节点

SamplePer3Secs 每3秒采样数

Percentage 采样百分比,例如10%采样则配置为10

Logging
日志配置节点

Level 日志级别
FilePath 日志保存路径
Transport

传输配置节点

Interval 每多少毫秒刷新

gRPC
gRPC配置节点

Servers gRPC地址,多个用逗号“,”
Timeout 创建gRPC链接的超时时间,毫秒
ConnectTimeout gRPC最长链接时间,毫秒

launchSettings.json文件配置SK

"profiles": { // 项目
    "IIS Express": { // IIS部署项
      "commandName": "IISExpress",
      "launchBrowser": true,
      "launchUrl": "weatherforecast",
      "environmentVariables": {
        "ASPNETCORE_ENVIRONMENT": "Development",
        "ASPNETCORE_HOSTINGSTARTUPASSEMBLIES": "SkyAPM.Agent.AspNetCore",
        "SKYWALKING__SERVICENAME": "MySkyWalkingDemoTest"
      }
    },
    "SkyWalkingDemo": { // castrol部署项
      "commandName": "Project",
      "launchBrowser": true,
      "launchUrl": "weatherforecast",
      "applicationUrl": "http://localhost:5000",
      "environmentVariables": {
        "ASPNETCORE_ENVIRONMENT": "Development",
        "ASPNETCORE_HOSTINGSTARTUPASSEMBLIES": "SkyAPM.Agent.AspNetCore", // 必须配置
        "SKYWALKING__SERVICENAME": "MySkyWalkingDemoTest" // 必须配置,在skywalking做标识
      }
    }
  }

startup.cs文件中添加

public void ConfigureServices(IServiceCollection services)
{
    services.AddSkyApmExtensions(); // 添加Skywalking相关配置
    services.AddControllers();
    services.AddHttpClient();
}

获取traceId

private readonly IEntrySegmentContextAccessor segContext;

public SkywalkingController(IEntrySegmentContextAccessor segContext)
{
    this.segContext = segContext;
}

/// <summary>
/// 获取链接追踪ID
/// </summary>
/// <returns></returns>
[HttpGet("traceId")]
public string GetSkywalkingTraceId()
{
    return segContext.Context.TraceId;
}

自定义调用链路的信息

[HttpGet]
public async Task<IActionResult> SkywalkingTest()
{
    //获取全局的skywalking的TracId
    var TraceId = _segContext.Context.TraceId;
    Console.WriteLine($"TraceId={TraceId}");
    _segContext.Context.Span.AddLog(LogEvent.Message($"SkywalkingTest---Worker running at: {DateTime.Now}"));

    System.Threading.Thread.Sleep(1000);

    _segContext.Context.Span.AddLog(LogEvent.Message($"SkywalkingTest---Worker running at--end: {DateTime.Now}"));

    return Ok($"Ok,SkywalkingTest-TraceId={TraceId} ");
}

接入微服务网关+后台微服务

在网关添加neget包

ItemGroup>
    <PackageReference Include="SkyAPM.Agent.AspNetCore" Version="1.3.0" />
</ItemGroup>

配置文件如上

{
  "SkyWalking": {
    "ServiceName": "MySkyWalking_Gateway", #修改名称就OK
    "Namespace": "",
    "HeaderVersions": [
      "sw8"
    ],
    "Sampling": {
      "SamplePer3Secs": -1,
      "Percentage": -1.0
    },
    "Logging": {
      "Level": "Debug",
      "FilePath": "logs\\skyapm-{Date}.log"
    },
    "Transport": {
      "Interval": 3000,
      "ProtocolVersion": "v8",
      "QueueSize": 30000,
      "BatchSize": 3000,
      "gRPC": {
        "Servers": "192.168.3.245:11800",
        "Timeout": 10000,
        "ConnectTimeout": 10000,
        "ReportTimeout": 600000,
        "Authentication": ""
      }
    }
  }
}

launchsettings.json添加环境变量

"profiles": {
    "Zhaoxi.MicroService.GatewayCenter": {
      "commandName": "Project",
      "dotnetRunMessages": true,
      "launchBrowser": true,
      "launchUrl": "swagger",
      "applicationUrl": "https://localhost:7141;http://localhost:5141",
      "environmentVariables": {
        "ASPNETCORE_ENVIRONMENT": "Development",
        "ASPNETCORE_HOSTINGSTARTUPASSEMBLIES": "SkyAPM.Agent.AspNetCore", #添加HOST变量
        "SKYWALKING__SERVICENAME": "MySkyWalking_Gateway" #添加服务名称
      }
    },
    "IIS Express": {
      "commandName": "IISExpress",
      "launchBrowser": true,
      "launchUrl": "swagger",
      "environmentVariables": {
        "ASPNETCORE_ENVIRONMENT": "Development",
        "ASPNETCORE_HOSTINGSTARTUPASSEMBLIES": "SkyAPM.Agent.AspNetCore",
        "SKYWALKING__SERVICENAME": "MySkyWalking_Gateway"
      }
    }
  }

修改网关配置文件,添加OrderServiceInstance微服务的路由

{
    "DownstreamPathTemplate": "/api/{url}", //服务地址--url变量
    "DownstreamScheme": "http",
    "UpstreamPathTemplate": "/microservice/{url}", //网关地址--url变量
    "UpstreamHttpMethod": [ "Get", "Post" ],
    "UseServiceDiscovery": true,
    "ServiceName": "OrderService", //consul服务名称
    "LoadBalancerOptions": {
        "Type": "RoundRobin" //轮询
}

 

启动网关

dotnet run --urls=http://*:6299

配置Skywalking告警

1 配置告警规则

进入容器

docker exec -it 12f053748e85 /bin/sh

 

找到文件

 

 通过cat alarm-settings.yml可以查阅文件内容,如下:

docker cp 12f053748e85:/skywalking/config/alarm-settings.yml .

 

规则常用指标解读:
rule name: 规则名称,必须唯一,必须以 **_rule**结尾;
metrics name: oal(Observability Analysis Language)脚本中的度量名;名称在SkyWalking后端服务中已经定义,进入容器skywalking-oap之后,进入如下目录就可以找到。

include names: 本规则告警生效的实体名称,如服务名,终端名;
exclude-names:将此规则作用于不匹配的实体名称上,如服务名,终端名;
threshold: 阈值,可以是一个数组,即可以配置多个值;
op: 操作符, 可以设定 >, <, =;
period: 多久检查一次当前的指标数据是否符合告警规则;以分钟为单位
count: 超过阈值条件,达到**count**次数,触发告警;
silence period:在同一个周期,指定的**silence period**时间内,忽略相同的告警消息;
更多告警规则详情,请参照这个地址:https://github.com/apache/skywalking/blob/master/docs/en/setup/backend/backend-alarm.md

 

修改告警规则

rules:
    service_test_sal_rule:
        # 指定指标名称
        metrics-name: service_test_sal
        # 小于
        op: "<"
        # 指定阈值
        threshold: 8000
        # 每2分钟检测告警该规则
        period: 2
        # 触发1次规则就告警
        count: 1
        # 设置三分钟内容相同告警,不重复告警
        silence-period: 3
        # 配置告警信息
        message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes

概要:服务成功率在过去2分钟内低于80%

告警API编写

这个本质还是SkyWalking根据规则进行检查,如果符合规则条件,就通过**WebHook、gRPCHook、WeChat Hook、Dingtalk Hook**等方式进行消息通知;接收到告警数据信息之后,可以自行处理消息。这里为了方便,就采用**WebHook**的方式进行演示,即触发告警条件之后,SkyWalking会调用配置的WebHook 接口,并传递对应的告警信息;
定义数据模型

public class AlarmMsg
{
    public int scopeId { get; set; }
    public string? scope { get; set; }
    public string? name { get; set; }
    public string? id0 { get; set; }
    public string? id1 { get; set; }
    public string? ruleName { get; set; }
    public string? alarmMessage { get; set; }
}

 

定义WebHook调用API

/// <summary>
/// 告警API
/// </summary>
/// <param name="msgs"></param>
/// <returns></returns>
[HttpPost("AlarmMsg")]
public void AlarmMsg(List<AlarmMsg> msgs)
{
    string msg = "触发告警:";
    msg += msgs.FirstOrDefault()?.alarmMessage;
    Console.WriteLine(msg);
    SendMail(msg);
}

 

配置webHook
http://192.168.3.105:7900/api/Skywalking/AlarmMsg
# Sample alarm rules.
rules:
  # Rule unique name, must be ended with `_rule`.
  service_resp_time_rule:
    metrics-name: service_resp_time
    op: ">"
    threshold: 1000
    period: 10
    count: 3
    silence-period: 5
    message: Response time of service {name} is more than 1000ms in 3 minutes of last 10 minutes.
  service_sla_rule:
    # Metrics value need to be long, double or int
    metrics-name: service_sla
    op: "<"
    threshold: 8000
    # The length of time to evaluate the metrics
    period: 10
    # How many times after the metrics match the condition, will trigger alarm
    count: 2
    # How many times of checks, the alarm keeps silence after alarm triggered, default as same as period.
    silence-period: 3
    message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
  service_resp_time_percentile_rule:
    # Metrics value need to be long, double or int
    metrics-name: service_percentile
    op: ">"
    threshold: 1000,1000,1000,1000,1000
    period: 10
    count: 3
    silence-period: 5
    message: Percentile response time of service {name} alarm in 3 minutes of last 10 minutes, due to more than one condition of p50 > 1000, p75 > 1000, p90 > 1000, p95 > 1000, p99 > 1000
  service_instance_resp_time_rule:
    metrics-name: service_instance_resp_time
    op: ">"
    threshold: 1000
    period: 10
    count: 2
    silence-period: 5
    message: Response time of service instance {name} is more than 1000ms in 2 minutes of last 10 minutes
  database_access_resp_time_rule:
    metrics-name: database_access_resp_time
    threshold: 1000
    op: ">"
    period: 10
    count: 2
    message: Response time of database access {name} is more than 1000ms in 2 minutes of last 10 minutes
  endpoint_relation_resp_time_rule:
    metrics-name: endpoint_relation_resp_time
    threshold: 1000
    op: ">"
    period: 10
    count: 2
    message: Response time of endpoint relation {name} is more than 1000ms in 2 minutes of last 10 minutes
#  Active endpoint related metrics alarm will cost more memory than service and service instance metrics alarm.
#  Because the number of endpoint is much more than service and instance.
#
#  endpoint_avg_rule:
#    metrics-name: endpoint_avg
#    op: ">"
#    threshold: 1000
#    period: 10
#    count: 2
#    silence-period: 5
#    message: Response time of endpoint {name} is more than 1000ms in 2 minutes of last 10 minutes

webhooks:
  - http://192.168.3.105:7900/api/Skywalking/AlarmMsg
#  - http://127.0.0.1/go-wechat/

 

ules:
  # 告警规则名称,必须唯一,以_rule结尾
  service_sla_rule:
     # 指定metrics-name
     metrics-name: service_sla
     # 小于
     op: "<" 
     # 指定阈值
     threshold: 8000                                                                           
     # 10分钟检测一次告警规则                                                 
     period: 10                                                                                 
     # 触发2次告警规则就告警                   
     count: 2                                                                                   
     # 设置的3分钟时间段有相同的告警,不重复告警.
     silence-period: 3 
      # 配置告警消息
     message: Successful rate of service {name} is lower than 80% in 2 minutes of last 10 minutes
webhooks:
- http://192.168.3.105:7900/api/Skywalking/AlarmMsg

 




posted @ 2022-02-23 21:03  非著名架构师  阅读(285)  评论(0编辑  收藏  举报