同时使用cgroup1和cgroup2对系统资源进行限制
1、编辑启动项:
root@node115:~# vim.tiny /etc/default/grub
添加:GRUB_CMDLINE_LINUX="cgroup_no_v1=blkio"
root@node115:~# update-grub
root@node115:~# reboot
2、检查blkio在cgroup1上是否关闭(要重启两次)
root@node115:~# dmesg -T|grep group
[Wed Apr 27 12:03:27 2022] Command line: BOOT_IMAGE=/boot/vmlinuz-5.3.10-1-pve root=/dev/mapper/vcl-root ro cgroup_no_v1=blkio quiet
[Wed Apr 27 12:03:27 2022] Built 1 zonelists, mobility grouping on. Total pages: 2064227
[Wed Apr 27 12:03:27 2022] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-5.3.10-1-pve root=/dev/mapper/vcl-root ro cgroup_no_v1=blkio quiet
[Wed Apr 27 12:03:27 2022] Disabling io control group subsystem in v1 mounts
[Wed Apr 27 12:03:27 2022] *** VALIDATE cgroup1 ***
[Wed Apr 27 12:03:27 2022] *** VALIDATE cgroup2 ***
[Wed Apr 27 12:03:30 2022] cgroup: systemd (1) created nested cgroup for controller "memory" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.
[Wed Apr 27 12:03:30 2022] cgroup: "memory" requires setting use_hierarchy to 1 on the root
root@node115:~# reboot
root@node115:~# dmesg -T|grep group
[Tue Apr 26 09:23:21 2022] Command line: BOOT_IMAGE=/boot/vmlinuz-5.3.10-1-pve root=/dev/mapper/vcl-root ro cgroup_no_v1=blkio quiet
[Tue Apr 26 09:23:21 2022] Built 1 zonelists, mobility grouping on. Total pages: 2064227
[Tue Apr 26 09:23:21 2022] Kernel command line: BOOT_IMAGE=/boot/vmlinuz-5.3.10-1-pve root=/dev/mapper/vcl-root ro cgroup_no_v1=blkio quiet
[Tue Apr 26 09:23:21 2022] Disabling io control group subsystem in v1 mounts
[Tue Apr 26 09:23:21 2022] *** VALIDATE cgroup1 ***
[Tue Apr 26 09:23:21 2022] *** VALIDATE cgroup2 ***
3、挂载cgroup2目录
root@node115:~# mount -t cgroup2 -o nosuid,nodev,noexec cgroup /sys/fs/cgroup/blkio/
root@node115:~# df -aT
Filesystem Type 1K-blocks Used Available Use% Mounted on
sysfs sysfs 0 0 0 - /sys
proc proc 0 0 0 - /proc
udev devtmpfs 4051300 0 4051300 0% /dev
devpts devpts 0 0 0 - /dev/pts
tmpfs tmpfs 815176 17228 797948 3% /run
/dev/mapper/vcl-root ext4 7933384 3747364 3763316 50% /
securityfs securityfs 0 0 0 - /sys/kernel/security
tmpfs tmpfs 4075880 53040 4022840 2% /dev/shm
tmpfs tmpfs 5120 0 5120 0% /run/lock
tmpfs tmpfs 4075880 0 4075880 0% /sys/fs/cgroup
cgroup cgroup 0 0 0 - /sys/fs/cgroup/systemd
pstore pstore 0 0 0 - /sys/fs/pstore
cgroup cgroup 0 0 0 - /sys/fs/cgroup/net_cls,net_prio
cgroup cgroup 0 0 0 - /sys/fs/cgroup/memory
cgroup cgroup 0 0 0 - /sys/fs/cgroup/rdma
cgroup cgroup 0 0 0 - /sys/fs/cgroup/cpuset
cgroup cgroup 0 0 0 - /sys/fs/cgroup/cpu,cpuacct
cgroup cgroup 0 0 0 - /sys/fs/cgroup/pids
cgroup cgroup 0 0 0 - /sys/fs/cgroup/perf_event
cgroup cgroup 0 0 0 - /sys/fs/cgroup/freezer
cgroup cgroup 0 0 0 - /sys/fs/cgroup/devices
cgroup cgroup 0 0 0 - /sys/fs/cgroup/hugetlb
systemd-1 - - - - - /proc/sys/fs/binfmt_misc
mqueue mqueue 0 0 0 - /dev/mqueue
hugetlbfs hugetlbfs 0 0 0 - /dev/hugepages
debugfs debugfs 0 0 0 - /sys/kernel/debug
sunrpc rpc_pipefs 0 0 0 - /run/rpc_pipefs
configfs configfs 0 0 0 - /sys/kernel/config
fusectl fusectl 0 0 0 - /sys/fs/fuse/connections
lxcfs fuse.lxcfs 0 0 0 - /var/lib/lxcfs
/dev/fuse fuse 30720 20 30700 1% /etc/pve
tmpfs tmpfs 815176 0 815176 0% /run/user/0
binfmt_misc binfmt_misc 0 0 0 - /proc/sys/fs/binfmt_misc
cgroup cgroup2 0 0 0 - /sys/fs/cgroup/blkio
4、添加control项
root@node115:~# cd /sys/fs/cgroup/blkio
root@node115:/sys/fs/cgroup/blkio# ls -l
total 0
-r--r--r-- 1 root root 0 Apr 26 09:24 cgroup.controllers
-rw-r--r-- 1 root root 0 Apr 26 09:24 cgroup.max.depth
-rw-r--r-- 1 root root 0 Apr 26 09:24 cgroup.max.descendants
-rw-r--r-- 1 root root 0 Apr 26 09:24 cgroup.procs
-r--r--r-- 1 root root 0 Apr 26 09:24 cgroup.stat
-rw-r--r-- 1 root root 0 Apr 26 09:24 cgroup.subtree_control
-rw-r--r-- 1 root root 0 Apr 26 09:24 cgroup.threads
-rw-r--r-- 1 root root 0 Apr 26 09:24 cpu.pressure
-rw-r--r-- 1 root root 0 Apr 26 09:24 io.pressure
-rw-r--r-- 1 root root 0 Apr 26 09:24 memory.pressure
root@node115:/sys/fs/cgroup/blkio# cat cgroup.subtree_control
root@node115:/sys/fs/cgroup/blkio# echo "+io" > /sys/fs/cgroup/blkio/cgroup.subtree_control
root@node115:/sys/fs/cgroup/blkio# cat cgroup.subtree_control
io
5、限制磁盘带宽(cgroup2)
root@node115:/sys/fs/cgroup/blkio# mkdir user.slice
root@node115:/sys/fs/cgroup/blkio# cd user.slice/
root@node115:/sys/fs/cgroup/blkio/user.slice# cat io.max
root@node115:/sys/fs/cgroup/blkio/user.slice# echo "8:16 wbps=10485760" > io.max
root@node115:/sys/fs/cgroup/blkio/user.slice# echo "8:16 rbps=10485760" > io.max
root@node115:/sys/fs/cgroup/blkio/user.slice# cat io.max
8:16 rbps=10485760 wbps=10485760 riops=max wiops=max
6、测试磁盘读写带宽
root@node115:~# echo $$ > /sys/fs/cgroup/blkio/user.slice/cgroup.procs
root@node115:~# fio -filename=/mnt/sdb/testfile -direct=1 -iodepth 1 -thread -rw=read -ioengine=libaio -bs=64k -size=2G -numjobs=10 -runtime=60 -group_reporting -name=mytest
mytest: (g=0): rw=read, bs=64K-64K/64K-64K/64K-64K, ioengine=libaio, iodepth=1
...
fio-2.16
Starting 10 threads
Jobs: 10 (f=10): [R(10)] [100.0% done] [10250KB/0KB/0KB /s] [160/0/0 iops] [eta 00m:00s]
mytest: (groupid=0, jobs=10): err= 0: pid=4225: Wed Apr 27 11:25:46 2022
read : io=615040KB, bw=10249KB/s, iops=160, runt= 60009msec
slat (usec): min=4, max=3142, avg=43.23, stdev=62.91
clat (usec): min=266, max=129652, avg=62390.92, stdev=46852.06
lat (usec): min=295, max=129675, avg=62434.15, stdev=46832.50
clat percentiles (usec):
| 1.00th=[ 652], 5.00th=[ 916], 10.00th=[ 1064], 20.00th=[ 1240],
| 30.00th=[ 1832], 40.00th=[83456], 50.00th=[95744], 60.00th=[98816],
| 70.00th=[98816], 80.00th=[99840], 90.00th=[100864], 95.00th=[105984],
| 99.00th=[117248], 99.50th=[121344], 99.90th=[126464], 99.95th=[129536],
| 99.99th=[129536]
lat (usec) : 500=0.28%, 750=1.73%, 1000=5.44%
lat (msec) : 2=23.45%, 4=2.23%, 10=2.39%, 20=1.65%, 50=0.40%
lat (msec) : 100=49.81%, 250=12.61%
cpu : usr=0.04%, sys=0.11%, ctx=9792, majf=2, minf=179
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=9610/w=0/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=1
Run status group 0 (all jobs):
READ: io=615040KB, aggrb=10249KB/s, minb=10249KB/s, maxb=10249KB/s, mint=60009msec, maxt=60009msec
Disk stats (read/write):
sdb: ios=7981/2, merge=1675/0, ticks=24208/13, in_queue=10576, util=7.46%
root@node115:~# fio -filename=/mnt/sdb/testfile -direct=1 -iodepth 1 -thread -rw=write -ioengine=libaio -bs=64k -size=2G -numjobs=10 -runtime=60 -group_reporting -name=mytest
mytest: (g=0): rw=write, bs=64K-64K/64K-64K/64K-64K, ioengine=libaio, iodepth=1
...
fio-2.16
Starting 10 threads
Jobs: 10 (f=10): [W(10)] [100.0% done] [0KB/10240KB/0KB /s] [0/160/0 iops] [eta 00m:00s]
mytest: (groupid=0, jobs=10): err= 0: pid=2917: Wed Apr 27 11:23:05 2022
write: io=616064KB, bw=10254KB/s, iops=160, runt= 60078msec
slat (usec): min=7, max=6874, avg=56.35, stdev=77.24
clat (usec): min=867, max=113648, avg=62344.94, stdev=46731.61
lat (usec): min=952, max=113681, avg=62401.29, stdev=46709.69
clat percentiles (usec):
| 1.00th=[ 1464], 5.00th=[ 1720], 10.00th=[ 1832], 20.00th=[ 2008],
| 30.00th=[ 2256], 40.00th=[95744], 50.00th=[97792], 60.00th=[98816],
| 70.00th=[98816], 80.00th=[98816], 90.00th=[98816], 95.00th=[99840],
| 99.00th=[102912], 99.50th=[104960], 99.90th=[111104], 99.95th=[112128],
| 99.99th=[113152]
lat (usec) : 1000=0.10%
lat (msec) : 2=19.67%, 4=17.38%, 10=0.19%, 20=0.06%, 50=0.17%
lat (msec) : 100=59.38%, 250=3.05%
cpu : usr=0.06%, sys=0.10%, ctx=9897, majf=0, minf=12
IO depths : 1=100.0%, 2=0.0%, 4=0.0%, 8=0.0%, 16=0.0%, 32=0.0%, >=64=0.0%
submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0%
issued : total=r=0/w=9626/d=0, short=r=0/w=0/d=0, drop=r=0/w=0/d=0
latency : target=0, window=0, percentile=100.00%, depth=1
Run status group 0 (all jobs):
WRITE: io=616064KB, aggrb=10254KB/s, minb=10254KB/s, maxb=10254KB/s, mint=60078msec, maxt=60078msec
Disk stats (read/write):
sdb: ios=72/8527, merge=0/1075, ticks=29/22506, in_queue=2244, util=8.25%
7、限制程序最大可用内存(cgroup1)
编写程序,循环分配600M内存.
root@node115:~/inode_test# vim.tiny memtest.c
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define CHUNK_SIZE 1024 * 1024 * 100
int main()
{
char *p;
int i;
for(i = 0; i < 6; i ++)
{
p = malloc(CHUNK_SIZE);
if(p == NULL)
{
printf("malloc error!");
return 0;
}
memset(p, 0, CHUNK_SIZE);
printf("malloc memory %d MB\n", (i + 1) * 100);
sleep(10);
}
while(1)
{
sleep(1);
}
return 0;
}
[root@node115 inode_test]# gcc memtest.c -o memtest
新开一个终端窗口,关闭虚拟内存:
root@node115:/sys/fs/cgroup/memory/user.slice# echo 0 > memory.swappiness
限制最大内存为200M:
root@node115:/sys/fs/cgroup/memory/user.slice# echo 209715200 > memory.limit_in_bytes
运行程序:
root@node115:~/inode_test# ./memtest
将程序的pid进程号加入cgroup的tasks中:
root@node115:/sys/fs/cgroup/memory/user.slice# echo `ps -ef | grep memtest | grep -v grep | awk '{print $2}'` > tasks
在程序运行的终端中查看进程会自动被kill掉:
root@node115:~/inode_test# ./memtest
malloc memory 100 MB
malloc memory 200 MB
Killed
可以看到cgroup2和cgroup1是同时生效的。