ConfigMap

创建configmap

cat prometheus-rules.yaml

apiVersion: v1
data:
  cpu-usage.rules: |
    groups:
    - name: cpu-usage.rules
      rules:
      - alert: CPU使用率过高(大于80%)
        expr: (100 - (avg by(instance) (irate(node_cpu{mode="idle",name="node-exporter"}[5m]))* 100)) > 80
        for: 5m
        labels:
          severity: page
        annotations:
          description: 'CPU使用率过高: {{ $value }}'
          summary: 'CPU使用率过高: {{ $value }}'
          value: '{{ $value }}'
  mysql-usage.rules: |
    groups:
    - name: mysql-usage.rules
      rules:
      - alert: mysql状态没有正常up
        expr: mysql_up != 1
        for: 2m
        labels:
          severity: warning
        annotations:
          description: 'mysql状态没有正常up: {{ $value }}'
          summary: 'mysql状态没有正常up: {{ $value }}'
          value: '{{ $value }}'
  odl.rules: |
    groups:
    - name: odl.rules
      rules:
      - alert: OVSDB跟OPENFLOW数量差异过大
        expr: abs(ODL_ESTABLISHED_NUMBER_6640 - ODL_ESTABLISHED_NUMBER_6633) > 200
        for: 5m
        labels:
          severity: warning
        annotations:
          description: 'OVSDB跟OPENFLOW数量差异过大: {{ $value }}'
          summary: 'OVSDB跟OPENFLOW数量差异过大: {{ $value }}'
          value: '{{ $value }}'
  memory.rules: |
    groups:
    - name: memory.rules
      rules:
      - alert: 内存使用率过高(>75%)
        expr: (node_memory_MemTotal - node_memory_MemFree - node_memory_Cached - node_memory_Buffers) / node_memory_MemTotal* 100 > 75
        for: 5m
        labels:
          severity: warning
        annotations:
          description: 'CPU使用率过高: {{ $value }}'
          summary: 'CPU使用率过高: {{ $value }}'
          value: '{{ $value }}'
  mysql-used-connections.rules: |
    groups:
    - name: mysql-used-connections.rules
      rules:
      - alert: mysql的used_connections变化过大
        expr: rate(mysql_global_status_max_used_connections[5m])> 40
        for: 2m
        labels:
          severity: warning
        annotations:
          description: 'mysql的used_connections变化过大: {{ $value }}'
          summary: 'mysql的used_connections变化过大: {{ $value }}'
          value: '{{ $value }}'
  diskhighuse.rules: |
    groups:
    - name: diskhighuse.rules
      rules:
      - alert: 磁盘使用率过高(>75%)
        expr: (node_filesystem_size{mountpoint="/"} - node_filesystem_avail{mountpoint="/"}) / node_filesystem_size{mountpoint="/"}* 100 > 75
        for: 5m
        labels:
          severity: warning
        annotations:
          description: '磁盘使用率过高: {{ $value }}'
          summary: '磁盘使用率过高: {{ $value }}'
          value: '{{ $value }}'
  diskiohighuse.rules: |
    groups:
    - name: diskiohighuse.rules
      rules:
      - alert: iowait过高(>50%)
        expr: node_disk_io_now{device="sda"}
          > 50
        for: 1m
        labels:
          severity: warning
        annotations:
          description: '磁盘iowait过高: {{ $value }}'
          summary: '磁盘iowait过高: {{ $value }}'
          value: '{{ $value }}'
  ODL_OVSDB_closewait.rules: |
    groups:
    - name: ODL_OVSDB_closewait.rules
      rules:
      - alert: ODL_OVSDB_closewait数量过多
        expr: ODL_CLOSEWAIT_NUMBER_6640 > 200
        labels:
          severity: warning
        annotations:
          description: 'ODL_OVSDB_closewait数量过多: {{ $value }}'
          summary: 'ODL_OVSDB_closewait数量过多: {{ $value }}'
          value: '{{ $value }}'
  Openflow.rules: |
    groups:
    - name: openflow.rules
      rules:
      - alert: OPENFLOW数量跟前12小时的平均数量差距过大
        expr: abs(ODL_ESTABLISHED_NUMBER_6633-avg_over_time(ODL_ESTABLISHED_NUMBER_6633[1h])) > 2000
        for: 5m
        labels:
          severity: warning
        annotations:
          description: 'OPENFLOW数量跟前12小时的平均数量差距过大: {{ $value }}'
          summary: 'OPENFLOW数量跟前12小时的平均数量差距过大: {{ $value }}'
          value: '{{ $value }}'
  ODL_OPENFLOW_closewait.rules: |
    groups:
    - name: ODL_OPENFLOW_closewait.rules
      rules:
      - alert: ODL_OPENFLOW_closewait数量过多
        expr: ODL_CLOSEWAIT_NUMBER_6633 > 200
        labels:
          severity: warning
        annotations:
          description: 'ODL_OPENFLOW_closewait数量过多: {{ $value }}'
          summary: 'ODL_OPENFLOW_closewait数量过多: {{ $value }}'
          value: '{{ $value }}'
  node_reboot.rules: |
    groups:
    - name: node_reboot.rules
      rules:
      - alert: node被重启
        expr: changes(node_boot_time[5m]) > 0
        labels:
          severity: warning
        annotations:
          description: 'node被重启: {{ $value }}'
          summary: 'node被重启: {{ $value }}'
          value: '{{ $value }}'
  redis-cluster.rules: |
    groups:
    - name: redis-cluster.rules
      rules:
      - alert: redis_cluster一分钟内没有收到数据
        expr: absent(redis_cluster_nodes_fail)
        for: 1m
        labels:
          severity: warning
        annotations:
          description: 'redis_cluster一分钟内没有收到数据: {{ $value }}'
          summary: 'redis_cluster一分钟内没有收到数据: {{ $value }}'
          value: '{{ $value }}'
  redis_cluster_nodes.rules: |
    groups:
    - name: redis_cluster_nodes.rules
      rules:
      - alert: redis_cluster_nodes数量发生变化
        expr: rate(redis_cluster_nodes_connected[5m]) > 0
        for: 1m
        labels:
          severity: warning
        annotations:
          description: 'redis_cluster_nodes数量发生变化: {{ $value }}'
          summary: 'redis_cluster_nodes数量发生变化: {{ $value }}'
          value: '{{ $value }}'
  redis_fail.rules: |
    groups:
    - name: redis_fail.rules
      rules:
      - alert: redis_cluster_nodes存在fail的数量
        expr: redis_cluster_nodes_fail > 0
        for: 1m
        labels:
          severity: warning
        annotations:
          description: 'redis_cluster_nodes存在fail的数量: {{ $value }}'
          summary: 'redis_cluster_nodes存在fail的数量: {{ $value }}'
          value: '{{ $value }}'
kind: ConfigMap
metadata:
  creationTimestamp: null
  name: prometheus-rules
  namespace: monitoring
View Code

挂载configmap

cat prometheus.yaml

apiVersion: extensions/v1beta1
kind: Deployment
metadata:
  name: prometheus-core
  namespace: monitoring
  labels:
    app: prometheus
    component: core
spec:
  replicas: 1
  template:
    metadata:
      name: prometheus-main
      labels:
        app: prometheus
        component: core
    spec:
      serviceAccountName: prometheus-k8s
      containers:
      - name: prometheus
        image: 10.50.51.95:5000/prometheus:v2.4.3
        args:
          - --storage.tsdb.retention=15d
          - --config.file=/etc/prometheus/prometheus.yml
         # - '-storage.local.retention=168h'
         # - '-storage.local.memory-chunks=1536000000'
         # - '-config.file=/etc/prometheus/prometheus.yml'
         # - '-alertmanager.url=http://alertmanager:9093/'
        ports:
        - name: webui
          containerPort: 9090
        resources:
          requests:
            #cpu: 500m
            #memory: 200M
          limits:
            #cpu: 500m
            #memory: 200M
        volumeMounts:
        - name: config-volume
          mountPath: /etc/prometheus
        - name: pdata
          mountPath: /prometheus
        - name: rules-volume
          mountPath: /etc/prometheus-rules
      volumes:
      - name: config-volume
        configMap:
          name: prometheus-core
      - name: pdata
        hostPath:
          path: /opt/prometheusdata
      - name: rules-volume
        configMap:
          name: prometheus-rules
      nodeSelector:
       environment: monitor
View Code

https://blog.51cto.com/wzlinux/2331050

posted @ 2020-07-03 10:22  hanwei666  阅读(144)  评论(0编辑  收藏  举报
……