python监控NVIDIA显卡温度

`# -*- coding: utf-8 -*-
# @Time    : 2024/4/22 9:55
# @Author  : Rohing
# @File    : python监控显卡.py
# @Software: PyCharm
import subprocess
import time

# 设置温度告警阈值
TEMP_THRESHOLD = 75  # 温度阈值为75摄氏度

def get_gpu_temperature():
    # 使用nvidia-smi命令获取显卡温度
    try:
        result = subprocess.run(['nvidia-smi', '--query-gpu=temperature.gpu', '--format=csv,noheader'], stdout=subprocess.PIPE)
        temp_str = result.stdout.decode('utf-8').strip()
        return int(temp_str)
    except Exception as e:
        print("无法获取显卡温度信息:", e)
        return None

def monitor_gpu_temp():
    while True:
        temp = get_gpu_temperature()
        if temp is not None:
            print(f"当前显卡温度: {temp}°C")
            if temp > TEMP_THRESHOLD:
                print(f"警告: 显卡温度超过{TEMP_THRESHOLD}°C!")
        else:
            print("无法读取显卡温度。")
        time.sleep(60)  # 每60秒检查一次温度

if __name__ == "__main__":
    monitor_gpu_temp()
`
posted @ 2024-09-10 19:13  Rohin1  阅读(16)  评论(0编辑  收藏  举报