Fastapi--资源监管

class ResItemLiveness(BaseModel):
    resCode: int = 0
    resMsg: str = ""
    resData: dict = dict()

    def status_check(self, cpu_thresh, memory_thresh, gpu_memory_thresh, cpu_intervel: int = 1, gpu_index: int = 0):
        # CPU资源查询
        gb = 1024**3
        cpu_logical_cores  = psutil.cpu_count(logical=True)
        cpu_physical_cores = psutil.cpu_count(logical=False)
        cpu_usage          = psutil.cpu_percent(interval=cpu_intervel)
        if (100 - cpu_usage) < cpu_thresh:
            self.resMsg += f"CPUError: CPU resources have been used by {cpu_usage}%, with an expected remaining {cpu_thresh}%."
            self.resCode = 1

        # 内存资源查询
        memory_usage = psutil.virtual_memory()
        memory_able = (memory_usage.total - memory_usage.used) / gb
        if memory_able < memory_thresh:
            self.resMsg += f"MemroyError: Insufficient memory resources, expected to have {memory_thresh}(GB) "\
                f"of free space, with {memory_able}(GB) of remaining resources."
            self.resCode = 1

        # GPU资源查询——限查询Nvidia显卡资源
        try:
            import pynvml
            pynvml.nvmlInit()
            device_count = pynvml.nvmlDeviceGetCount()
            check_gpu_index = gpu_index if gpu_index < device_count else 0
            handle = pynvml.nvmlDeviceGetHandleByIndex(check_gpu_index)
            memory_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
            usage = pynvml.nvmlDeviceGetUtilizationRates(handle)
            free_memory = memory_info.free / gb
            if free_memory < gpu_memory_thresh:
                self.resMsg += f"GPUError: Insufficient gpu-memory resources, expected to have {gpu_memory_thresh}(GB) of "\
                    f"free space, with {free_memory}(GB) of remaining resources."
                self.resCode = 1
            gpu_resource = {
                "numGPU": device_count,
                "currentIndexGPU": check_gpu_index,
                "totalMemoryGPU": memory_info.total / gb,
                "usedMemoryGPU": memory_info.used / gb,
                "freeMemoryGPU": free_memory,
                "utilizationRateGPU": usage.gpu,
                "utilizationMemoryGPU": usage.memory
            }
        except Exception as e:
            gpu_resource = {
                "numGPU": -1,
                "totalMemoryGPU": -1,
                "usedMemoryGPU": -1,
                "freeMemoryGPU": -1,
                "currentIndexGPU": -1,
                "utilizationRateGPU": 0,
                "utilizationMemoryGPU": 0
            }
            self.resMsg += str(e)
            self.resCode = 1
        

        # 设置实例属性
        resourceCheck = {
            "logicalCores": cpu_logical_cores,
            "physicalCores": cpu_physical_cores,
            "cpuUsage": cpu_usage,
            "memoryTotal": memory_usage.total / gb,
            "memoryAvailable": memory_usage.available / gb,
            "memoryFree": memory_usage.free / gb,
            "memoryUsed": memory_usage.used / gb,
            "memoryActive": memory_usage.active / gb,
            "memoryInactive": memory_usage.inactive / gb,
            "memoryBuffers": memory_usage.buffers / gb,
            "memoryCached": memory_usage.cached / gb,
            "memoryShared": memory_usage.shared / gb,
            "memorySlab": memory_usage.slab / gb,
            "memoryPercent": memory_usage.percent / gb

        }
        resourceCheck.update(gpu_resource)
        self.resData = resourceCheck
posted @ 2024-07-18 17:32  巴蜀秀才  阅读(1)  评论(0编辑  收藏  举报