快速查看显卡使用情况和占用用户

这是一个转载博客

转载于:

https://zinglix.xyz/2021/11/17/gpu-user/

 

pip install xmltodict

import subprocess
import xmltodict, pwd, json

UID = 1
EUID = 2


def owner(pid):
    """Return username of UID of process pid"""
    for ln in open("/proc/{}/status".format(pid)):
        if ln.startswith("Uid:"):
            uid = int(ln.split()[UID])
            return pwd.getpwuid(uid).pw_name


def add_user(process):
    tmp = []
    for p in process:
        p["user"] = owner(p["pid"])
        tmp.append(p)
    return tmp


def simplify(gpu):
    tmp = {}
    for k in gpu.keys():
        if k in [
            "@id",
            "product_name",
            "fan_speed",
            "fb_memory_usage",
            "utilization",
            "temperature",
            "processes",
        ]:
            tmp[k] = gpu[k]
    return tmp


def get_gpu_info():
    sp = subprocess.Popen(
        ["nvidia-smi", "-q", "-x"],
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )
    out_str = sp.communicate()
    out_str = out_str[0].decode("utf-8")
    o = xmltodict.parse(out_str)["nvidia_smi_log"]
    o = json.loads(json.dumps(o))
    gpu_list = []
    if not isinstance(o["gpu"], list):
        o["gpu"] = [o["gpu"]]
    for gpu in o["gpu"]:
        if gpu["processes"] is None:
            gpu["processes"] = {}
            gpu["processes"]["process_info"] = []
        process = gpu["processes"]["process_info"]
        if not isinstance(process, list):
            process = [process]
        process = add_user(process)
        gpu["processes"]["process_info"] = process

        gpu = simplify(gpu)
        gpu_list.append(gpu)
    o["gpu"] = gpu_list
    return o


gpu = get_gpu_info()
print()
print(
    "    {: <13}\t{: <8}\t{: <20}\t{}".format(
        "user", "pid", "used_memory", "process_name"
    )
)
print(
    "---------------------------------------------------------"
)
for i, g in enumerate(gpu["gpu"]):
    print(
        "{} {} ({}):".format(
            i,
            g["product_name"],
            g["utilization"]["gpu_util"],
        )
    )
    total = int(g["fb_memory_usage"]["total"].split(" ")[0])
    for p in g["processes"]["process_info"]:
        used = int(p["used_memory"].split(" ")[0])
        print(
            "    {: <13}\t{: <8}\t{: <20}\t{}".format(
                p["user"],
                p["pid"],
                "{: <10} ({:5.2f}%)".format(
                    p["used_memory"], 100 * used / total
                ),
                p["process_name"],
            )
        )
    print(
        "---------------------------------------------------------"
    )
print()

 

posted @ 2024-01-12 14:18  立冬以东  阅读(46)  评论(0编辑  收藏  举报