helm安装prometheus-operator

一、环境介绍

[root@master test]# uname -r
4.4.223-1.el7.elrepo.x86_64
[root@master test]# kubectl  get node 
NAME     STATUS   ROLES    AGE   VERSION
master   Ready    master   27d   v1.18.0
node     Ready    <none>   27d   v1.18.0
[root@master test]# helm  version
version.BuildInfo{Version:"v3.2.0", GitCommit:"e11b7ce3b12db2941e90399e874513fbd24bcb71", GitTreeState:"clean", GoVersion:"go1.13.10"}

  

二、helm添加仓库

#阿里云
helm  repo add aliyuncs https://apphub.aliyuncs.com
#官方
helm  repo add stable  https://kubernetes-charts.storage.googleapis.com

 

三、helm search prometheus-operator

[root@master test]# helm search repo  prometheus-operator
NAME                            CHART VERSION    APP VERSION    DESCRIPTION                                       
aliyuncs/prometheus-operator    8.7.0            0.35.0         Provides easy monitoring definitions for Kubern...
stable/prometheus-operator      8.13.7           0.38.1         Provides easy monitoring definitions for Kubern...

四、安装

helm  install mypro aliyuncs/prometheus-operator 

五、查看

[root@master test]# helm list
NAME     NAMESPACE    REVISION    UPDATED                                    STATUS      CHART                        APP VERSION
mypro    default      1           2020-06-09 09:32:37.091220013 +0800 CST    deployed    prometheus-operator-8.7.0    0.35.0     
[root@master test]# helm  status mypro
NAME: mypro
LAST DEPLOYED: Tue Jun  9 09:32:37 2020
NAMESPACE: default
STATUS: deployed
REVISION: 1
NOTES:
The Prometheus Operator has been installed. Check its status by running:
  kubectl --namespace default get pods -l "release=mypro"

Visit https://github.com/coreos/prometheus-operator for instructions on how
to create & configure Alertmanager and Prometheus instances using the Operator.
[root@master test]# kubectl --namespace default get pods -l "release=mypro"
NAME                                                  READY   STATUS    RESTARTS   AGE
mypro-grafana-f5b868868-8ckgs                         2/2     Running   0          55m
mypro-prometheus-node-exporter-dg6w4                  1/1     Running   0          55m
mypro-prometheus-node-exporter-x9l4b                  1/1     Running   0          55m
mypro-prometheus-operator-operator-5b458d4659-p7t4l   2/2     Running   0          55m

六、配置ingress浏览器访问

[root@master test]# cat grafana-ingress.yaml 
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: ingress-grafana
spec:
  rules:
  - host: grafana.zhang.com
    http:
      paths:
      - backend:
          serviceName: mypro-grafana
          servicePort: 80
[root@master test]# cat prometheus-ingress.yaml 
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: ingress-prometheus
spec:
  rules:
  - host: prometheus.zhang.com
    http:
      paths:
      - backend:
          serviceName: mypro-prometheus-operator-prometheus
          servicePort: 9090
[root@master test]# cat alertmanager-ingress.yaml 
apiVersion: extensions/v1beta1
kind: Ingress
metadata:
  name: ingress-alertmanager
spec:
  rules:
  - host: alertmanager.zhang.com
    http:
      paths:
      - backend:
          serviceName: mypro-prometheus-operator-alertmanager
          servicePort: 9093

七、查看

[root@master test]# kubectl get ingress
NAME                   CLASS    HOSTS                    ADDRESS         PORTS   AGE
ingress-alertmanager   <none>   alertmanager.zhang.com   10.111.12.239   80      29m
ingress-grafana        <none>   grafana.zhang.com        10.111.12.239   80      32m
ingress-prometheus     <none>   prometheus.zhang.com     10.111.12.239   80      30m

八、浏览器访问

 

 

 

 

 

九、修改alertmanager的报警配置

1、创建alertmanger的配置文件

[root@master test]# cat  alertmanger_config.yaml 
global: 
  resolve_timeout: 5m #处理超时时间,默认为5min
  smtp_smarthost: 'smtp.163.com:465' # 邮箱smtp服务器代理
  smtp_from: 'xxxx@163.com' # 发送邮箱名称
  smtp_auth_username: 'xxxx@163.com' # 邮箱名称
  smtp_auth_password: 'xxxxxxxxx' #邮箱密码
  smtp_require_tls: false 
route:
  group_by: ['alertname'] # 报警分组名称
  group_wait: 10s # 最初即第一次等待多久时间发送一组警报的通知
  group_interval: 10s # 在发送新警报前的等待时间
  repeat_interval: 1m # 发送重复警报的周期
  receiver: 'email' # 发送警报的接收者的名称,以下receivers name的名称

receivers:
  - name: 'email' # 警报
    email_configs: # 邮箱配置
    - to: 'xxxxxx@163.com'  # 接收警报的email配置

inhibit_rules:
  - source_match:
      severity: 'critical'
    target_match:
      severity: 'warning'
    equal: ['alertname', 'dev', 'instance']

2、base64编码(要使用xargs设置为一行,再用sed删掉空格)

cat  alertmanger_config.yaml |base64 

 

3、替换secret中的alertmanager.yaml配置(用上面base64编码的替换)

 kubectl edit secret  alertmanager-mypro-prometheus-operator-alertmanager 

4、查看邮箱

 

十、自定义监控报警项

 1、介绍

prometheus-operator可以使用PrometheusRule来动态的添加自定义监控项

 

2、查看prometheus-operator项目中Prometheus的标签选择器

kubectl get  prometheus mypro-prometheus-operator-prometheus  -o jsonpath={".spec.ruleSelector"};echo 

 

3、创建自定义的PrometheusRule监控

[root@master ~]# cat  test-PrometheusRule.yaml 
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
  labels:
    app: prometheus-operator   #和Prometheus中的标签选择器中的标签,如果要自己创建一个Prometheus的配置关联到PrometheusRule的labels
    release: mypro             #和Prometheus中的标签选择器中的标签,如果要自己创建一个Prometheus的配置关联到PrometheusRule的labels
    prometheus: test-example
  name: test-load1-prometheusrule  
spec:
  groups:
  - name: test-load-1
    rules:
    - alert: test-load-1
      expr: node_load1 > 1
      for: 2m
      labels:
        team: node
      annotations:
        summary: "{{$labels.instance}}: load 1 >1"
        description: "{{$labels.instance}}: job {{$labels.job}} 测试测试 负载大于1"


#导入
kubectl  apply -f test-PrometheusRule.yaml 

  

4、登录pod查看

[root@master ~]# kubectl exec -it   prometheus-mypro-prometheus-operator-prometheus-0  sh 
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl kubectl exec [POD] -- [COMMAND] instead.
Defaulting container name to prometheus.
Use 'kubectl describe pod/prometheus-mypro-prometheus-operator-prometheus-0 -n default' to see all of the containers in this pod.
/prometheus $ ls /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml 
/etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml
/prometheus $ cat  /etc/prometheus/rules/prometheus-mypro-prometheus-operator-prometheus-rulefiles-0/default-test-load1-prometheusrule.yaml 
groups:
- name: test-load-1
  rules:
  - alert: test-load-1
    annotations:
      description: '{{$labels.instance}}: job {{$labels.job}} 测试测试 负载大于1'
      summary: '{{$labels.instance}}: load 1 >1'
    expr: node_load1 > 1
    for: 2m
    labels:
      team: node

5、浏览器查看prometheus

 

posted @ 2020-06-09 10:35  巽逸  阅读(1768)  评论(1编辑  收藏  举报