Python 监控内存溢出
- 背景
- 监控后端程序日志中是否包含:OutOfMemory,包含则重启,并且通过Dingding告警
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | #!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2023/8/1 10:23 # @File : outofmemory.py # @Author : zk_linux # @Software: PyCharm # @Description: import readline import time import subprocess import re import logging import os from collections import deque from temp import ail_the_alarm logging.basicConfig(level = logging.INFO, filename = './log/outofmemory.log' , filemode = 'a' , format = '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' ) def wc_count(file_name): ''' 获取文件行数 :param file_name: :return: ''' file_lins = len ( open (file_name).readlines()) return file_lins def getLastContent(file_name, lastCount): ''' 新增行数 :param file_name: :param lastCount: :return: ''' with open (file_name, "r" ) as f: output = deque(f, lastCount); return list (output) def filterate( list , key = "OutOfMemory" ): for item in list : if key in item: return True return False def restart_container(): restart = subprocess.run([ 'docker restart zk-refactor-esl-business' ], shell = True , stderr = subprocess.PIPE) def monitor_log_file(log_file, interval = 5 ): now_max_count = 0 while True : new_last_count = wc_count(log_file) add_file_count = new_last_count - now_max_count logging.info( "The program is normal and under continuous monitoring, add_file_count: %s" ,add_file_count) now_max_count = new_last_count if add_file_count > 0 : add_lins = getLastContent(log_file, add_file_count) filter_results = filterate(add_lins) if filter_results = = True : logging.error( "Program memory overflow, attempting to restart container" ) restart_container() webhook = ail_the_alarm.DingTalkUrl( '/server/scripts/config.ini' ).get_config() ding_msg = ail_the_alarm.Send_Dingding( 'HK-集群环境01' , webhook[ 'mobile_number' ], web_url = webhook[ 'prod_webhook_url' ],secret = webhook[ 'prod_secret' ], msg = "内存溢出,尝试重启esl-business" ) ding_msg.send_dingnding() time.sleep(interval) if __name__ = = "__main__" : log_file = "/usr/local/esl/zk-refactor-esl-business/log/log_error.log" monitor_log_file(log_file) |
生产环境调用钉钉告警即可
通过systemctl管理脚本
[root@acs-hk-ctos7-prod-01 system]# pwd /etc/systemd/system [root@acs-hk-ctos7-prod-01 system]# cat omm.service [Unit] Description=My Python Script Service After=network.target [Service] Type=simple User=root WorkingDirectory= /server/scripts/ ExecStart=/usr/bin/python3 outofmemory.py [Install] WantedBy=multi-user.target
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 25岁的心里话
· 闲置电脑爆改个人服务器(超详细) #公网映射 #Vmware虚拟网络编辑器
· 零经验选手,Compose 一天开发一款小游戏!
· 因为Apifox不支持离线,我果断选择了Apipost!
· 通过 API 将Deepseek响应流式内容输出到前端
2020-08-03 日志清理