查询当前可用GPU的可用空闲显存并在显存空余时发邮件提醒
查询当前可用GPU的可用空闲显存并在显存空余时发邮件提醒。还可以监测显存申请情况。提醒发现挖矿病毒或者其他恶意进程。
参考博文:Pytorch:实现查询当前可用GPU的可用空闲内存并按照内存从大到小顺序打印出来
保存如下代码到’gpu_status.py’
# coding:utf-8 # python2 # import numpy as np import os from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart import smtplib import time def send_mail(subject='No-reply', message='No-reply'): email_host = 'smtp.126.com' # 服务器地址 sender = '***@126.com' # 发件人 password = 'your_email_password_here' # 密码,如果是授权码就填授权码 receiver = '***@126.com' # 收件人 msg = MIMEMultipart() msg['Subject'] = subject # 标题 msg['From'] = '' # 发件人昵称 msg['To'] = '' # 收件人昵称 mail_msg = '''<p>\n\t {}</p>'''.format(message) msg.attach(MIMEText(mail_msg, 'html', 'utf-8')) # 发送 smtp = smtplib.SMTP(email_host, 25) #smtp.connect(email_host, 25) smtp.starttls() smtp.login(sender, password) smtp.sendmail(sender, receiver, msg.as_string()) smtp.quit() print('success') def get_gpu_memory(): os.system('nvidia-smi -q -d Memory | grep -A4 GPU | grep Free > tmp.txt') memory_gpu = [int(x.split()[2]) for x in open('tmp.txt', 'r').readlines() if int(x.split()[2]) > 4000]#空闲空间大于4000M os.system('rm tmp.txt') return memory_gpu flag_last = get_gpu_memory() while True: gpu_memory = get_gpu_memory() #print("gpu free memory:{} ".format(gpu_memory)) #flag = np.array(gpu_memory) #num_changed = np.linalg.norm(np.sign(flag - flag_last), ord=1) # 如果有一块卡显存改变 #if num_changed > 0: if gpu_memory: while True: try: send_mail("gpu(s) has changed", "gpu free memory: {} ".format(gpu_memory)) break except: print('warning: email not sent.') flag_last = flag time.sleep(60)
python gpu_status.py
注:
(1)仅尝试过126邮箱,发给自己没问题。
转载:https://blog.csdn.net/qq_30125323/article/details/115419929
本文来自博客园,作者:海_纳百川,转载请注明原文链接:https://www.cnblogs.com/chentiao/p/16329369.html,如有侵权联系删除