Python 监控内存溢出
- 背景
- 监控后端程序日志中是否包含:OutOfMemory,包含则重启,并且通过Dingding告警
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2023/8/1 10:23 # @File : outofmemory.py # @Author : zk_linux # @Software: PyCharm # @Description: import readline import time import subprocess import re import logging import os from collections import deque from temp import ail_the_alarm logging.basicConfig(level=logging.INFO, filename='./log/outofmemory.log', filemode='a', format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' ) def wc_count(file_name): ''' 获取文件行数 :param file_name: :return: ''' file_lins = len(open(file_name).readlines()) return file_lins def getLastContent(file_name, lastCount): ''' 新增行数 :param file_name: :param lastCount: :return: ''' with open(file_name, "r") as f: output = deque(f, lastCount); return list(output) def filterate(list, key="OutOfMemory"): for item in list: if key in item: return True return False def restart_container(): restart = subprocess.run(['docker restart zk-refactor-esl-business'], shell=True, stderr=subprocess.PIPE) def monitor_log_file(log_file, interval=5): now_max_count = 0 while True: new_last_count = wc_count(log_file) add_file_count = new_last_count - now_max_count logging.info("The program is normal and under continuous monitoring, add_file_count: %s",add_file_count) now_max_count = new_last_count if add_file_count > 0: add_lins = getLastContent(log_file, add_file_count) filter_results = filterate(add_lins) if filter_results == True: logging.error("Program memory overflow, attempting to restart container") restart_container() webhook = ail_the_alarm.DingTalkUrl('/server/scripts/config.ini').get_config() ding_msg = ail_the_alarm.Send_Dingding('HK-集群环境01', webhook['mobile_number'], web_url=webhook['prod_webhook_url'],secret=webhook['prod_secret'], msg="内存溢出,尝试重启esl-business") ding_msg.send_dingnding() time.sleep(interval) if __name__ == "__main__": log_file = "/usr/local/esl/zk-refactor-esl-business/log/log_error.log" monitor_log_file(log_file)
生产环境调用钉钉告警即可
通过systemctl管理脚本
[root@acs-hk-ctos7-prod-01 system]# pwd /etc/systemd/system [root@acs-hk-ctos7-prod-01 system]# cat omm.service [Unit] Description=My Python Script Service After=network.target [Service] Type=simple User=root WorkingDirectory= /server/scripts/ ExecStart=/usr/bin/python3 outofmemory.py [Install] WantedBy=multi-user.target