otlp采集数据的虚拟机环境配置

采集+监控

1.LB

LB 配置文件，nginx自带的ngx_http_stub_status_module提供的/nginx_status（可自定义命名）端点输出的是nginx自己的简单状态信息

vim InforSuiteLB/conf/InforSuiteLB.conf

location /nginx_status {
            stub_status on;
           # access_log   off;
           # allow 127.0.0.1;
           # deny all;
        }

配置好启动LB即可

2.metrics

1.Nginx Prometheus Exporter

解压tar包，创建systemctl启动命令

# 解压
tar -zxvf nginx-prometheus-exporter_1.1.0_linux_amd64.tar.gz

vim /etc/systemd/system/nginx-prometheus-exporter.service

[Unit]
Description=nginx-prometheus-exporter
Documentation=https://github.com/nginxinc/nginx-prometheus-exporter
After=network.target

[Service]
Type=simple
User=root
ExecStart= /usr/local/nginx-prometheus-exporter \
-web.listen-address=:9113 \
-nginx.scrape-uri=http://192.168.209.132:80/nginx_status
Restart=on-failure

[Install]
WantedBy=multi-user.target

启动Nginx Prometheus Exporter

# 重新加载服务文件
systemctl daemon-reload
# 设置开机自启
systemctl enable nginx-prometheus-exporter.service
# 启动exporter
systemctl start nginx-prometheus-exporter.service
# 查看exporter状态
systemctl status nginx-prometheus-exporter.service

Nginx Prometheus Exporter 将nginx暴露出来的指标转为Prometheus可接收的metrics格式，供其他组件收集或拉取

2.prometheus 数据源

# 1 进入安装目录
cd /usr/local
# 2 下载安装包
wget https://github.com/prometheus/prometheus/releases/download/v2.42.0/prometheus-2.42.0.linux-amd64.tar.gz
# 3 解压
tar -zxvf prometheus-2.42.0.linux-amd64.tar.gz
# 4 重命名
mv prometheus-2.42.0.linux-amd64 prometheus

配置开机自启动

vim /usr/lib/systemd/system/prometheus.service

[Unit]
Description=Prometheus
After=network.target
Documentation=https://prometheus.io/

[Service]
Type=simple
ExecStart=/usr/local/prometheus/prometheus --config.file=/usr/local/prometheus/prometheus.yml --storage.tsdb.path=/usr/local/prometheus/data --web.listen-address=:9090 --web.enable-lifecycle
Restart=on-failure

[Install]
WantedBy=multi-user.target

配置文件

vi prometheus/prometheus.yml

# my global config
global:
  scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
  evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
  # scrape_timeout is set to the global default (10s).



# Alertmanager configuration
alerting:
  alertmanagers:
    - static_configs:
        - targets:
          # - alertmanager:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"


remote_write:
  - url: "http://192.168.209.132:4317/api/v1/write"
remote_read:
  - url: "http://192.168.209.132:4317/api/v1/read"



# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
  # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
  - job_name: "prometheus"

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.

    static_configs:
      - targets: ["192.168.209.132:9090"]

  - job_name: 'nginx-stub-status'
    static_configs:
      - targets: ['192.168.209.132:1234']  # otelcol的地址和端口

启动

# 重新加载服务文件
systemctl daemon-reload
# 设置开机自启
systemctl enable prometheus
# 启动prometheus
systemctl start prometheus
# 查看prometheus状态
systemctl status prometheus

# 查看服务是否启动
lsof -i:9090

3.logs

1.loki

下载

mkdir /usr/local/loki

###下载二进制包
wget "https://github.com/grafana/loki/releases/download/v2.7.4/loki-linux-amd64.zip"
###解压二进制包
unzip "loki-linux-amd64.zip"
### make sure it is executable
chmod a+x "loki-linux-amd64"

配置

vim loki-local-config.yml

auth_enabled: false
 
server:
  http_listen_port: 3100
  grpc_listen_port: 9096
 
ingester:
  lifecycler:
    address: 127.0.0.1
    ring:
      kvstore:
        store: inmemory
      replication_factor: 1
    final_sleep: 0s
  chunk_idle_period: 10m
  chunk_retain_period: 30s
schema_config:
  configs:
  - from: 2020-05-15
    store: boltdb
    object_store: filesystem
    schema: v11
    index:
      prefix: index_
      period: 168h
storage_config:
  boltdb:
    directory: /usr/local/loki/index
  filesystem:
    directory: /usr/local/loki/chunks  # 块存储路径
 
limits_config:
  enforce_metric_name: false
  reject_old_samples: true          # 是否拒绝老样本
  reject_old_samples_max_age: 168h  # 168小时之前的样本将会被删除
  ingestion_rate_mb: 200
  ingestion_burst_size_mb: 300
  per_stream_rate_limit: 1000MB
  max_entries_limit_per_query: 10000
chunk_store_config:
  max_look_back_period: 168h        # 为避免查询超过保留期的数据，必须小于或等于下方的时间值
table_manager:
  retention_deletes_enabled: true   # 保留删除开启
  retention_period: 168h            # 超过168h的块数据将被删除
 
ruler:
  storage:
    type: local
    local:
      directory: /usr/local/loki/rules
  rule_path: /usr/loca/loki/rules-temp
  alertmanager_url: http://192.168.209.132:9093    # alertmanager地址
  ring:
    kvstore:
      store: inmemory
  enable_api: true
  enable_alertmanager_v2: true

启动文件

vim restart-loki.sh

配置

#!/bin/bash
echo "stop loki"
ps -ef | grep loki-linux-amd64 | grep -v grep | awk '{print $2}'| xargs kill -9 
 
echo "Begin start loki"
sleep 1
str=$"\n"
nohup ./loki-linux-amd64 --config.file=loki-local-config.yml &
sstr=$(echo -e $str)
echo $sstr

### 增加执行权限
chmod +x restart-loki.sh
### 启动
cd /usr/local/loki
./restart-loki.sh

2.日志代理 Promtail

下载

mkdir /usr/local/promtail

###下载二进制包
wget "https://github.com/grafana/loki/releases/download/v2.7.4/promtail-linux-amd64.zip"
###解压二进制包
unzip promtail-linux-amd64
### make sure it is executable
chmod a+x "promtail-linux-amd64"

配置

vim promtail-local-config.yml

server:
  http_listen_port: 9080
  grpc_listen_port: 0

positions:
  filename: /usr/local/promtail/positions.yaml

clients:
  - url: http://192.168.209.132:3100/loki/api/v1/push # 填写好Loki地址

scrape_configs:
- job_name: nginx
  pipeline_stages:
  - replace:
      expression: '(?:[0-9]{1,3}\.){3}([0-9]{1,3})'
      replace: '***'
  static_configs:
  - targets:
      - localhost
    labels:
      job: nginx_access_log
      host: appfelstrudel
      agent: promtail
      __path__: /usr/local/InforSuiteLB/logs/json_access.log

启动文件

vi restart-promtail.sh

配置

#!/bin/bash
echo "Begin stop promtail"
ps -ef | grep promtail-linux-amd64 | grep -v grep | awk '{print $2}' | xargs kill -9
 
echo "Begin start promtail...."
nohup ./promtail-linux-amd64 --config.file=promtail-local-config.yml > ./promtail-9080.log 2>&1 &

### 增加执行权限
chmod +x restart-promtail.sh
### 启动
cd /usr/local/promtail
./restart-promtail.sh

4.OpenTelemetry Collector

rpm安装

rpm -ivh otelcol_0.94.0_linux_amd64.rpm

配置文件

vim /etc/otelcol/config.yaml

# To limit exposure to denial of service attacks, change the host in endpoints below from 0.0.0.0 to a specific network interface.
# See https://github.com/open-telemetry/opentelemetry-collector/blob/main/docs/security-best-practices.md#safeguards-against-denial-of-service-attacks

extensions:
  health_check:
  pprof:
    endpoint: 0.0.0.0:1777
  zpages:
    endpoint: 0.0.0.0:55679

receivers:
  otlp:
    protocols:
      grpc:
        endpoint: 0.0.0.0:4317
      http:
        endpoint: 0.0.0.0:4318

  opencensus:
    endpoint: 0.0.0.0:55678

  # Collect own metrics
  prometheus:
    config:
      scrape_configs:
      - job_name: 'nginx-stub-status'
        scrape_interval: 10s
        static_configs:
        - targets: ['192.168.209.132:9113']
          labels:
            job: 'nginx-stub-status'


  jaeger:
    protocols:
      grpc:
        endpoint: 0.0.0.0:14250
      thrift_binary:
        endpoint: 0.0.0.0:6832
      thrift_compact:
        endpoint: 0.0.0.0:6831
      thrift_http:
        endpoint: 0.0.0.0:14268

  zipkin:
    endpoint: 0.0.0.0:9411


processors:
  batch:

exporters:
  debug:
    verbosity: detailed
  prometheus/metrics:
    endpoint: "192.168.209.132:1234"


service:

  pipelines:

    traces:
      receivers: [otlp, opencensus, jaeger, zipkin]
      processors: [batch]
      exporters: [debug]

    metrics:
      receivers: [otlp, opencensus, prometheus]
      processors: [batch]
      exporters: [debug, prometheus/metrics]

    logs:
      receivers: [otlp]
      processors: [batch]
      exporters: [debug]


  extensions: [health_check, pprof, zpages]

启动服务

# 重新加载服务文件
systemctl daemon-reload
# 设置开机自启
systemctl enable otelcol
# 启动grafana
systemctl start otelcol
# 查看grafana状态
systemctl status otelcol

5.grafana 安装

# 1 进入安装目录
cd /usr/local
# 2 下载安装包
wget https://dl.grafana.com/oss/release/grafana-9.4.3.linux-amd64.tar.gz
# 3 解压
tar -zxvf grafana-9.4.3.linux-amd64.tar.gz
# 4 重命名
mv grafana-9.4.3 grafana

配置开机自启动

# 创建grafana.service文件
vim /usr/lib/systemd/system/grafana.service

[Unit]
Description=Grafana
After=network.target

[Service]
Type=notify
ExecStart=/usr/local/grafana/bin/grafana-server -homepath /usr/local/grafana
Restart=on-failure

[Install]
WantedBy=multi-user.target

启动

# 重新加载服务文件
systemctl daemon-reload
# 设置开机自启
systemctl enable grafana
# 启动grafana
systemctl start grafana
# 查看grafana状态
systemctl status grafana

# 查看服务是否启动
lsof -i:3000

配置中文