返回顶部

gpu节点剩余资源统计脚本

#!/bin/bash

gpu_node=`kubectl get node --show-labels | grep cuda=11.2 | awk '{print $1}'`
real_gpu_core_free=0
real_gpu_mem_free=0
core_sum=0
mem_sum=0

for ip in $gpu_node
do
    #gpu_core 
    totle_gpu_core=`kubectl describe node $ip | grep vcuda-core | awk '{print $2}' | sed -n '1p'`
    used_gpu_core=`kubectl describe node $ip | grep vcuda-core | awk '{print $2}'| sed -n '3p' `
    core_sum=`expr $core_sum+$total_gpu_core`       
    free_core=`expr $totle_gpu_core - $used_gpu_core`
    real_gpu_core_free=`expr $real_gpu_core_free+$free_core`
    echo "$ip gpu_core[ 'All':$totle_gpu_core, 'Free':$free_core ]"

    #gpu_mem
    totle_gpu_mem=`kubectl describe node $ip | grep vcuda-mem | awk '{print $2}' | sed -n '1p'`
    used_gpu_mem=`kubectl describe node $ip | grep vcuda-mem | awk '{print $2}'| sed -n '3p' `
    mem_sum=`expr $mem_sum+$total_gpu_mem`       
    free_mem=`expr $totle_gpu_mem - $used_gpu_mem`
    real_gpu_mem_free=`expr $real_gpu_mem_free+$free_mem`
    echo "$ip gpu_mem[ 'All':$totle_gpu_mem, 'Free':$free_mem ]"

    if [ $totle_gpu_core -eq $free_core ];then
        echo "$ip should be check"
    fi
done
posted @ 2022-10-21 10:30  丨君丶陌  阅读(70)  评论(0编辑  收藏  举报