K8S上的GPU调度
K8S支持GPU
1.Centos安装NVIDIA驱动
查看显卡型号
yum install pciutils
lspci | grep -i vga
nvdia官网下载驱动
https://www.nvidia.com.tw/Download/index.aspx?lang=tw
安装
https://yinguobing.com/install-nvidia-driver-centos-7/
安装完成
[root@localhost ~]# nvidia-smi
Tue Apr 4 16:12:13 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.182.03 Driver Version: 470.182.03 CUDA Version: 11.4 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... Off | 00000000:17:00.0 N/A | N/A |
| 30% 34C P8 N/A / N/A | 0MiB / 2001MiB | N/A Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
驱动卸载
sh NVIDIA-Linux-x86_64-470.182.03.run --uninstall
yum remove nvidia-*
rpm -qa|grep -i nvid|sort
yum remove kmod-nvidia-*
reboot
2.安装nvidia-docker支持
docker
https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html
安装docker-ce
yum install -y tar bzip2 make automake gcc gcc-c++ vim pciutils elfutils-libelf-devel libglvnd-devel iptables
yum-config-manager --add-repo=https://download.docker.com/linux/centos/docker-ce.repo
yum repolist -v
#由于 CentOS 不支持较新版本的 Docker-CE 所需的特定版本的 containerd.io 包,因此一种选择是手动安装 containerd.io 包,然后继续安装 docker-ce 包。
yum install -y https://download.docker.com/linux/centos/7/x86_64/stable/Packages/containerd.io-1.4.3-3.1.el7.x86_64.rpm
yum install docker-ce -y
systemctl --now enable docker
安装nvidia-docker2
docker 19.3以后不推荐使用docker2
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | \
sudo tee /etc/yum.repos.d/nvidia-docker.repo
sudo yum install -y nvidia-docker2
sudo pkill -SIGHUP dockerd
安装nvidia-container-toolkit
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
sudo yum clean expire-cache
sudo yum install -y nvidia-container-toolkit
#添加运行时 ##这里只是添加了运行时没有设为默认
sudo nvidia-ctk runtime configure --runtime=docker
设置默认运行时为nvidia
cat /etc/docker/daemon.json
{
...
"default-runtime": "nvidia", //这里要单独设置
"runtimes": {
"nvidia": {
"args": [],
"path": "nvidia-container-runtime"
}
}
}
重启docker
systemctl daemon-reload
systemctl restart docker
验证docker调用GPU是否正常
[root@localhost ~]# docker run --rm --gpus all nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi
Tue Apr 4 08:10:33 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.182.03 Driver Version: 470.182.03 CUDA Version: 11.6 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA GeForce ... Off | 00000000:17:00.0 N/A | N/A |
| 30% 34C P8 N/A / N/A | 0MiB / 2001MiB | N/A Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
docker info
[root@localhost fangzhou]# docker info
....
Runtimes: io.containerd.runc.v2 nvidia runc
Default Runtime: nvidia #确认docker的运行时已经更改
....
containerd
安装nvidia-docker2
docker 19.3以后不推荐使用docker2
distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/nvidia-docker/$distribution/nvidia-docker.repo | \
sudo tee /etc/yum.repos.d/nvidia-docker.repo
sudo yum install -y nvidia-docker2
sudo pkill -SIGHUP dockerd
安装nvidia-container-toolkit
distribution=$(. /etc/os-release;echo $ID$VERSION_ID) \
&& curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo
sudo yum clean expire-cache
sudo yum install -y nvidia-container-toolkit
#添加运行时 ##这里只是添加了运行时没有设为默认
sudo nvidia-ctk runtime configure --runtime=docker
设置默认运行时为nvidia
vim /etc/containerd/config.toml
...
[plugins."io.containerd.grpc.v1.cri".containerd]
default_runtime_name = "nvidia"
disable_snapshot_annotations = true
discard_unpacked_layers = false
ignore_rdt_not_enabled_errors = false
no_pivot = false
snapshotter = "overlayfs"
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime]
base_runtime_spec = ""
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = ""
[plugins."io.containerd.grpc.v1.cri".containerd.default_runtime.options]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
base_runtime_spec = ""
container_annotations = []
pod_annotations = []
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
BinaryName = ""
CriuImagePath = ""
CriuPath = ""
CriuWorkPath = ""
IoGid = 0
IoUid = 0
NoNewKeyring = false
NoPivotRoot = false
Root = ""
ShimCgroup = ""
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia]
privileged_without_host_devices = false
runtime_engine = ""
runtime_root = ""
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.nvidia.options]
BinaryName = "/usr/bin/nvidia-container-runtime"
SystemdCgroup = true
...
查看container运行时
crictl info
...
"config": {
"containerd": {
"snapshotter": "overlayfs",
"defaultRuntimeName": "nvidia",
"defaultRuntime": {
"runtimeType": "",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": [],
"ContainerAnnotations": [],
"runtimeRoot": "",
"options": {},
"privileged_without_host_devices": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0
},
"untrustedWorkloadRuntime": {
"runtimeType": "",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": [],
"ContainerAnnotations": [],
"runtimeRoot": "",
"options": {},
"privileged_without_host_devices": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0
},
"runtimes": {
"nvidia": {
"runtimeType": "io.containerd.runc.v2",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": null,
"ContainerAnnotations": null,
"runtimeRoot": "",
"options": {
"BinaryName": "/usr/bin/nvidia-container-runtime",
"SystemdCgroup": true
},
"privileged_without_host_devices": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0
},
"runc": {
"runtimeType": "io.containerd.runc.v2",
"runtimePath": "",
"runtimeEngine": "",
"PodAnnotations": [],
"ContainerAnnotations": [],
"runtimeRoot": "",
"options": {
"BinaryName": "",
"CriuImagePath": "",
"CriuPath": "",
"CriuWorkPath": "",
"IoGid": 0,
"IoUid": 0,
"NoNewKeyring": false,
"NoPivotRoot": false,
"Root": "",
"ShimCgroup": "",
"SystemdCgroup": true
},
"privileged_without_host_devices": false,
"baseRuntimeSpec": "",
"cniConfDir": "",
"cniMaxConfNum": 0
}
},
"noPivot": false,
"disableSnapshotAnnotations": true,
"discardUnpackedLayers": false,
"ignoreRdtNotEnabledErrors": false
...
3.daemonset方式安装k8s支持gpu插件(官方版本)
kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.13.0/nvidia-device-plugin.yml
# Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: nvidia-device-plugin-daemonset
namespace: kube-system
spec:
selector:
matchLabels:
name: nvidia-device-plugin-ds
updateStrategy:
type: RollingUpdate
template:
metadata:
labels:
name: nvidia-device-plugin-ds
spec:
tolerations:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
# Mark this pod as a critical add-on; when enabled, the critical add-on
# scheduler reserves resources for critical add-on pods so that they can
# be rescheduled after a failure.
# See https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/
priorityClassName: "system-node-critical"
containers:
- image: nvcr.io/nvidia/k8s-device-plugin:v0.13.0
name: nvidia-device-plugin-ctr
env:
- name: FAIL_ON_INIT_ERROR
value: "false"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop: ["ALL"]
volumeMounts:
- name: device-plugin
mountPath: /var/lib/kubelet/device-plugins
volumes:
- name: device-plugin
hostPath:
path: /var/lib/kubelet/device-plugins
查看插件日志
[root@localhost ~]# kubectl logs -f nvidia-device-plugin-daemonset-5l5pm -n kube-system
2023/04/04 09:00:53 Starting FS watcher.
2023/04/04 09:00:53 Starting OS watcher.
2023/04/04 09:00:53 Starting Plugins.
2023/04/04 09:00:53 Loading configuration.
2023/04/04 09:00:53 Updating config with default resource matching patterns.
2023/04/04 09:00:53
Running with config:
{
"version": "v1",
"flags": {
"migStrategy": "none",
"failOnInitError": false,
"nvidiaDriverRoot": "/",
"gdsEnabled": false,
"mofedEnabled": false,
"plugin": {
"passDeviceSpecs": false,
"deviceListStrategy": "envvar",
"deviceIDStrategy": "uuid"
}
},
"resources": {
"gpus": [
{
"pattern": "*",
"name": "nvidia.com/gpu"
}
]
},
"sharing": {
"timeSlicing": {}
}
}
2023/04/04 09:00:53 Retreiving plugins.
2023/04/04 09:00:53 Detected NVML platform: found NVML library
2023/04/04 09:00:53 Detected non-Tegra platform: /sys/devices/soc0/family file not found
2023/04/04 09:00:53 Starting GRPC server for 'nvidia.com/gpu'
2023/04/04 09:00:53 Starting to serve 'nvidia.com/gpu' on /var/lib/kubelet/device-plugins/nvidia-gpu.sock
2023/04/04 09:00:53 Registered device plugin for 'nvidia.com/gpu' with Kubelet
部署测试项目
apiVersion: batch/v1
kind: Job
metadata:
name: gpu-feature-discovery
labels:
app.kubernetes.io/name: gpu-feature-discovery
app.kubernetes.io/version: 0.8.0
app.kubernetes.io/part-of: nvidia-gpu
spec:
template:
metadata:
labels:
app.kubernetes.io/name: gpu-feature-discovery
app.kubernetes.io/version: 0.8.0
app.kubernetes.io/part-of: nvidia-gpu
spec:
nodeName: NODE_NAME
containers:
- image: nvcr.io/nvidia/gpu-feature-discovery:v0.8.0
name: gpu-feature-discovery
args:
- "--oneshot"
volumeMounts:
- name: output-dir
mountPath: "/etc/kubernetes/node-feature-discovery/features.d"
- name: host-sys
mountPath: "/sys"
securityContext:
privileged: true
volumes:
- name: output-dir
hostPath:
path: "/etc/kubernetes/node-feature-discovery/features.d"
- name: host-sys
hostPath:
path: "/sys"
restartPolicy: Never
查看项目日志
[root@localhost ~]# kubectl logs -f gpu-pod
[Vector addition of 50000 elements]
Copy input data from the host memory to the CUDA device
CUDA kernel launch with 196 blocks of 256 threads
Copy output data from the CUDA device to the host memory
Test PASSED
Done
4.gpu-operator(官方升级版)
官方插件完整版
需要先安装gpu驱动和nvidia-container-toolkit
centos7
centos版本和nvidia-container版本一致
helm install --wait --generate-name -n gpu-operator --create-namespace nvidia/gpu-operator --set driver.enabled=false --set toolkit.enabled=false --set toolkit.version=1.13.1-centos7
镜像替代
docker pull vk1602/node-feature-discovery:v0.12.1
5.第4范式VGPU调度插件
和官方插件选一个就行
https://github.com/4paradigm/k8s-vgpu-scheduler/blob/master/README_cn.md
VGPU必须安装nvdia-docker2不使用nvidia-container-toolkit
给支持gpu的节点打标签
kubectl label nodes {nodeid} gpu=on
安装vgpu
#添加仓库
helm repo add vgpu-charts https://4paradigm.github.io/k8s-vgpu-scheduler
#查看K8S版本
kubectl version
#指定版本安装
helm install vgpu vgpu-charts/vgpu --set scheduler.kubeScheduler.imageTag=v1.26.4 -n kube-system
查看安装
kubectl get pods -n kube-system
[root@localhost GPU]# kubectl get pod -n kube-system
NAME READY STATUS RESTARTS AGE
....
vgpu-device-plugin-zztcz 2/2 Running 0 18s #这俩pod正常即可,建议看一下该pod下的device-plugin日志
vgpu-scheduler-7555657c58-pzlrg 2/2 Running 0 18s
显存切割测试
apiVersion: v1
kind: Pod
metadata:
name: gpu-pod
spec:
containers:
- name: ubuntu-container
image: ubuntu:18.04
command: ["bash", "-c", "sleep 86400"]
resources:
limits:
nvidia.com/gpu: 1 # 请求2个vGPUs
nvidia.com/gpumem: 3000 # 每个vGPU申请3000m显存 (可选,整数类型)
nvidia.com/gpucores: 30 # 每个vGPU的算力为30%实际显卡的算力 (可选,整数类型)
[root@localhost ~]# nvidia-smi
Wed Apr 12 13:38:01 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA TITAN RTX Off | 00000000:17:00.0 Off | N/A |
| 41% 36C P8 25W / 280W | 0MiB / 24576MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
[root@localhost ~]# kubectl exec -it gpu-pod nvidia-smi
kubectl exec [POD] [COMMAND] is DEPRECATED and will be removed in a future version. Use kubectl exec [POD] -- [COMMAND] instead.
[4pdvGPU Warn(7:139721868449600:util.c:149)]: new_uuid=GPU-9539b1f3-3f8e-444a-1657-8ccb8bdb2b90 1
[4pdvGPU Msg(7:139721868449600:libvgpu.c:871)]: Initializing.....
[4pdvGPU Msg(7:139721868449600:device.c:249)]: driver version=12000
Wed Apr 12 05:38:14 2023
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |
|-------------------------------+----------------------+----------------------+
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |
| | | MIG M. |
|===============================+======================+======================|
| 0 NVIDIA TITAN RTX Off | 00000000:17:00.0 Off | N/A |
| 40% 36C P8 25W / 280W | 0MiB / 3000MiB | 0% Default |
| | | N/A |
+-------------------------------+----------------------+----------------------+
+-----------------------------------------------------------------------------+
| Processes: |
| GPU GI CI PID Type Process name GPU Memory |
| ID ID Usage |
|=============================================================================|
| No running processes found |
+-----------------------------------------------------------------------------+
[4pdvGPU Msg(7:139721868449600:multiprocess_memory_limit.c:457)]: Calling exit handler 7
查看监控信息
[root@localhost ~]# kubectl get svc -n kube-system
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
...
vgpu-device-plugin-monitor NodePort 10.68.75.180 <none> 31992:31992/TCP 23m
vgpu-scheduler NodePort 10.68.29.124 <none> 443:31242/TCP,31993:31993/TCP 23m
http://{nodeip}:{monitorPort}/metrics
DCGM监控
$ helm repo add gpu-helm-charts \
https://nvidia.github.io/dcgm-exporter/helm-charts
$ helm repo update
$ helm install \
--generate-name \
gpu-helm-charts/dcgm-exporter
FAQ
1.使用该插件的时候,如果需要调度GPU,则该pod不能包含特权关键字:securityContext,否则插件会报错,pod会无限创建
其它
https://virtaitech.com/ (收费版)