查询当前可用GPU的可用空闲显存并在显存空余时发邮件提醒

查询当前可用GPU的可用空闲显存并在显存空余时发邮件提醒。还可以监测显存申请情况。提醒发现挖矿病毒或者其他恶意进程。

参考博文:Pytorch:实现查询当前可用GPU的可用空闲内存并按照内存从大到小顺序打印出来

保存如下代码到’gpu_status.py’

# coding:utf-8
# python2
# import numpy as np
import os
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import smtplib
import time

def send_mail(subject='No-reply', message='No-reply'):
    email_host = 'smtp.126.com'  # 服务器地址
    sender = '***@126.com'  # 发件人
    password = 'your_email_password_here'  # 密码,如果是授权码就填授权码
    receiver = '***@126.com'  # 收件人

    msg = MIMEMultipart()
    msg['Subject'] = subject  # 标题
    msg['From'] = ''  # 发件人昵称
    msg['To'] = ''  # 收件人昵称
    mail_msg = '''<p>\n\t {}</p>'''.format(message)
    msg.attach(MIMEText(mail_msg, 'html', 'utf-8'))

    # 发送
    smtp = smtplib.SMTP(email_host, 25)

   #smtp.connect(email_host, 25)
   smtp.starttls()

    smtp.login(sender, password)
    smtp.sendmail(sender, receiver, msg.as_string())
    smtp.quit()
    print('success')

def get_gpu_memory():
    os.system('nvidia-smi -q -d Memory | grep -A4 GPU | grep Free > tmp.txt')
    memory_gpu = [int(x.split()[2]) for x in open('tmp.txt', 'r').readlines() if int(x.split()[2]) > 4000]#空闲空间大于4000M
    os.system('rm tmp.txt')
    return memory_gpu

flag_last = get_gpu_memory()
while True:
    gpu_memory = get_gpu_memory()
    #print("gpu free memory:{} ".format(gpu_memory))

    #flag =  np.array(gpu_memory)
    #num_changed = np.linalg.norm(np.sign(flag - flag_last), ord=1)
    # 如果有一块卡显存改变
    #if num_changed > 0:
  if gpu_memory:

        while True:
            try:
                send_mail("gpu(s) has changed", "gpu free memory: {} ".format(gpu_memory))
                break
            except:
                print('warning: email not sent.')

    flag_last = flag
    time.sleep(60)

 

运行命令:
python gpu_status.py

注:
(1)仅尝试过126邮箱,发给自己没问题。

转载:https://blog.csdn.net/qq_30125323/article/details/115419929

posted @ 2022-05-31 09:48  海_纳百川  阅读(153)  评论(0编辑  收藏  举报
本站总访问量