ulimit功能以及如何在内核中生效
关键词:ulimit、getrlimit、setrlimit、RLIMIT_CPU、RLIMIT_CORE等等。
内核资源限制通过ulimit进行读取和设置;ulimit进行资源设置之后,简单分析内核中是如何对系统行为进行限制的。
1. 了解ulimit(busybox)
以busybox中的ulimit为例,主要通过调用getrlimit()/setrlimit()设置系统的各种资源。
ulimit设置和获取的资源主要有如下几种:
#define RLIMIT_CPU 0 /* CPU time in sec */
#define RLIMIT_FSIZE 1 /* Maximum filesize */
#define RLIMIT_DATA 2 /* max data size */
#define RLIMIT_STACK 3 /* max stack size */
#define RLIMIT_CORE 4 /* max core file size */
#define RLIMIT_RSS 5 /* max resident set size */
#define RLIMIT_NPROC 6 /* max number of processes */
#define RLIMIT_NOFILE 7 /* max number of open files */
#define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
#define RLIMIT_AS 9 /* address space limit */
#define RLIMIT_LOCKS 10 /* maximum file locks held */
#define RLIMIT_SIGPENDING 11 /* max number of pending signals */
#define RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */
#define RLIMIT_NICE 13 /* max nice prio allowed to raise to 0-39 for nice level 19 .. -20 */
#define RLIMIT_RTPRIO 14 /* maximum realtime priority */
#define RLIMIT_RTTIME 15 /* timeout for RT tasks in us */
#define RLIM_NLIMITS 16
用户空间对内核资源的限制通过getrlimit()/setrlimit()两个函数进行。
其中resource就是如上的宏定义,struct rlimit是用户输入的阈值。
struct rlimit {
rlim_t rlim_cur; /* Soft limit */
rlim_t rlim_max; /* Hard limit (ceiling for rlim_cur) */
};
#include <sys/time.h>
#include <sys/resource.h>
int getrlimit(int resource, struct rlimit *rlim);
int setrlimit(int resource, const struct rlimit *rlim);
int prlimit(pid_t pid, int resource, const struct rlimit *new_limit, struct rlimit *old_limit);
内核中resource类型和ulimit命令的对应关系,通过limits_tbl[]关联起来。
static const struct limits limits_tbl[] = {
{ RLIMIT_FSIZE, 9, 'f', "file size (blocks)" },
{ RLIMIT_CPU, 0, 't', "cpu time (seconds)" },
{ RLIMIT_DATA, 10, 'd', "data seg size (kb)" },
{ RLIMIT_STACK, 10, 's', "stack size (kb)" },
{ RLIMIT_CORE, 9, 'c', "core file size (blocks)" },
{ RLIMIT_RSS, 10, 'm', "resident set size (kb)" },
{ RLIMIT_MEMLOCK, 10, 'l', "locked memory (kb)" },
{ RLIMIT_NPROC, 0, 'p', "processes" },
{ RLIMIT_NOFILE, 0, 'n', "file descriptors" },
{ RLIMIT_AS, 10, 'v', "address space (kb)" },
{ RLIMIT_LOCKS, 0, 'w', "locks" },
{ RLIMIT_NICE, 0, 'e', "scheduling priority" },
{ RLIMIT_RTPRIO, 0, 'r', "real-time priority" },
};
下面看看ulimit工具如何通过getrlimit()/setrlimit()对内核进行资源进行限制。
int FAST_FUNC
shell_builtin_ulimit(char **argv)
{
unsigned opts;
unsigned argc;
...
argc = string_array_len(argv);
opts = 0;
while (1) {
struct rlimit limit;
const struct limits *l;
int opt_char = getopt(argc, argv, ulimit_opt_string);
if (opt_char == -1)
break;
if (opt_char == 'H') {
opts |= OPT_hard;
continue;
}
if (opt_char == 'S') {
opts |= OPT_soft;
continue;
}
if (opt_char == 'a') {
for (l = limits_tbl; l != &limits_tbl[ARRAY_SIZE(limits_tbl)]; l++) {
getrlimit(l->cmd, &limit);
printf("-%c: %-30s ", l->option, l->name);
printlim(opts, &limit, l);
}
continue;
}
if (opt_char == 1)
opt_char = 'f';
for (l = limits_tbl; l != &limits_tbl[ARRAY_SIZE(limits_tbl)]; l++) {----------------------------limits_tbl[]中是struct limits结构体的数组,对应每一个resource资源。
if (opt_char == l->option) {-----------------------------------------------------------------选择和当前opt_char一致的limits_tbl[]成员,然后进行显示或者设置。
char *val_str;
getrlimit(l->cmd, &limit);---------------------------------------------------------------首先获取当前类型的resource。
val_str = optarg;
if (!val_str && argv[optind] && argv[optind][0] != '-')
val_str = argv[optind++]; /* ++ skips NN in "-c NN" case */
if (val_str) {---------------------------------------------------------------------------后面跟上参数的表示是设置,否则就是读取。
rlim_t val;
if (strcmp(val_str, "unlimited") == 0)
val = RLIM_INFINITY;-------------------------------------------------------------参数是unlimited类型。
else {
if (sizeof(val) == sizeof(int))
val = bb_strtou(val_str, NULL, 10);
else if (sizeof(val) == sizeof(long))
val = bb_strtoul(val_str, NULL, 10);
else
val = bb_strtoull(val_str, NULL, 10);
if (errno) {
bb_error_msg("invalid number '%s'", val_str);
return EXIT_FAILURE;
}
val <<= l->factor_shift;---------------------------------------------------------将参数转换成内核识别的值,这里面注意不同参数有factor_shift的区别,这是工具和内核之间的一个转换。
}
//bb_error_msg("opt %c val_str:'%s' val:%lld", opt_char, val_str, (long long)val);
/* from man bash: "If neither -H nor -S
* is specified, both the soft and hard
* limits are set. */
if (!opts)---------------------------------------------------------------------------不指定-H/-S则两个都设置,否则单独设置。
opts = OPT_hard + OPT_soft;
if (opts & OPT_hard)
limit.rlim_max = val;
if (opts & OPT_soft)
limit.rlim_cur = val;
//bb_error_msg("setrlimit(%d, %lld, %lld)", l->cmd, (long long)limit.rlim_cur, (long long)limit.rlim_max);
if (setrlimit(l->cmd, &limit) < 0) {-------------------------------------------------将指定类型的resource阈值设置到内核中。
bb_perror_msg("error setting limit");
return EXIT_FAILURE;
}
} else {
printlim(opts, &limit, l);
}
break;
}
} /* for (every possible opt) */
if (l == &limits_tbl[ARRAY_SIZE(limits_tbl)]) {
/* bad option. getopt already complained. */
break;
}
} /* while (there are options) */
return 0;
}
static void printlim(unsigned opts, const struct rlimit *limit,
const struct limits *l)
{
rlim_t val;
val = limit->rlim_max;
if (!(opts & OPT_hard))
val = limit->rlim_cur;
if (val == RLIM_INFINITY)
puts("unlimited");
else {
val >>= l->factor_shift;
printf("%llu\n", (long long) val);
}
}
至此可以了解到ulimit是如何对内核resource产生影响的。
2. getrlimit()/setrlimit()内核调用
getrlimit()/setrlimit()系统调用同名,实现如下。但是核心都是do_prlimit()。
SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
struct rlimit value;
int ret;
ret = do_prlimit(current, resource, NULL, &value);
if (!ret)
ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;
return ret;
}
SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
struct rlimit new_rlim;
if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
return -EFAULT;
return do_prlimit(current, resource, &new_rlim, NULL);
}
int do_prlimit(struct task_struct *tsk, unsigned int resource,
struct rlimit *new_rlim, struct rlimit *old_rlim)
{
struct rlimit *rlim;
int retval = 0;
if (resource >= RLIM_NLIMITS)
return -EINVAL;
if (new_rlim) {
if (new_rlim->rlim_cur > new_rlim->rlim_max)
return -EINVAL;
if (resource == RLIMIT_NOFILE &&
new_rlim->rlim_max > sysctl_nr_open)----------------------RLIMIT_NOFILE不能超过sysctl_nr_open数目。
return -EPERM;
}
read_lock(&tasklist_lock);
if (!tsk->sighand) {
retval = -ESRCH;
goto out;
}
rlim = tsk->signal->rlim + resource;
task_lock(tsk->group_leader);
if (new_rlim) {
if (new_rlim->rlim_max > rlim->rlim_max &&
!capable(CAP_SYS_RESOURCE))
retval = -EPERM;
if (!retval)
retval = security_task_setrlimit(tsk->group_leader,
resource, new_rlim);
if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
new_rlim->rlim_cur = 1;
}
}
...
task_unlock(tsk->group_leader);
if (!retval && new_rlim && resource == RLIMIT_CPU &&
new_rlim->rlim_cur != RLIM_INFINITY)
update_rlimit_cpu(tsk, new_rlim->rlim_cur);--------------------------设置RLIMIT_CPU需要更新CPU相关信息。
out:
read_unlock(&tasklist_lock);
return retval;
}
rlimit的设置比较简单,使用则分散则各处。
获取当前系统resource限制接口有:
static inline unsigned long task_rlimit(const struct task_struct *tsk,
unsigned int limit)
{
return READ_ONCE(tsk->signal->rlim[limit].rlim_cur);
}
static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
unsigned int limit)
{
return READ_ONCE(tsk->signal->rlim[limit].rlim_max);
}
static inline unsigned long rlimit(unsigned int limit)
{
return task_rlimit(current, limit);
}
static inline unsigned long rlimit_max(unsigned int limit)
{
return task_rlimit_max(current, limit);
}
新创建进程/线程的rlimit继承自父进程的rlimit。
3. 资源分类
下面对各种类型资源在内核中是如何进行限制简单分析。
3.1 RLIMIT_CPU 0 /* CPU time in sec */
RLIMIT_CPU表示进程CPU运行时间的最大值,单位是秒。
RLIMIT_CPU规定了进程所使用的做大CPU时间,超过soft发送SIGXCPU信号,超过hard发送SIGKILL信号。
static void check_process_timers(struct task_struct *tsk, struct list_head *firing) { ... soft = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);----------------------------------获取系统资源soft值。 if (soft != RLIM_INFINITY) { unsigned long psecs = cputime_to_secs(ptime);------------------------------表示当前进程所占用的CPU时间。 unsigned long hard = READ_ONCE(sig->rlim[RLIMIT_CPU].rlim_max);-------------------------获取系统资源hard值。 cputime_t x; if (psecs >= hard) { /* * At the hard limit, we just die. * No need to calculate anything else now. */ __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);----------------如果进程CPU时间超过hard,则向进程发送SIGKILL信号,杀死进程。 return; } if (psecs >= soft) { /* * At the soft limit, send a SIGXCPU every second. */ __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);----------------如果进程CPU时间超过soft,则向进程发送SIGXCPU信号。 if (soft < hard) { soft++; sig->rlim[RLIMIT_CPU].rlim_cur = soft; } } x = secs_to_cputime(soft); if (!prof_expires || x < prof_expires) { prof_expires = x; } } ... }
3.2 RLIMIT_FSIZE 1 /* Maximum filesize */
RLIMIT_FSIZE表示创建文件大小的最大值,超过此大小则发送SIGXFSZ。
int inode_newsize_ok(const struct inode *inode, loff_t offset) { if (inode->i_size < offset) { unsigned long limit; limit = rlimit(RLIMIT_FSIZE);---------------------------------获取系统RLIMIT_FSIZE大小。 if (limit != RLIM_INFINITY && offset > limit) goto out_sig; if (offset > inode->i_sb->s_maxbytes) goto out_big; } else { /* * truncation of in-use swapfiles is disallowed - it would * cause subsequent swapout to scribble on the now-freed * blocks. */ if (IS_SWAPFILE(inode)) return -ETXTBSY; } return 0; out_sig: send_sig(SIGXFSZ, current, 0);------------------------------------发送SIGXFSZ信号。 out_big: return -EFBIG; }
3.3 RLIMIT_DATA 2 /* max data size */
RLIMIT_DATA用于限制数据段大小的最大值。
may_expand_vm()用于判断是否允许进程扩大自己的vm空间,返回true表示允许,false表示禁止。
bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) { if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)------------------------首先检查进程的total_vm+pages是否大于RLIMIT_AS,如果超过则返回false,表示不允许扩大vm空间。 return false; if (is_data_mapping(flags) && mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) { /* Workaround for Valgrind */ if (rlimit(RLIMIT_DATA) == 0 && mm->data_vm + npages <= rlimit_max(RLIMIT_DATA) >> PAGE_SHIFT) return true; if (!ignore_rlimit_data) { pr_warn_once("%s (%d): VmData %lu exceed data ulimit %lu. Update limits or use boot option ignore_rlimit_data.\n", current->comm, current->pid, (mm->data_vm + npages) << PAGE_SHIFT, rlimit(RLIMIT_DATA)); return false;---------------------------------------------------------------如果区域大于RLIMIT_DATA,并且没有ignore_rlimit_data,返回false。 } } return true; }
3.4 RLIMIT_STACK 3 /* max stack size */
RLIMIT_STACK表示一个线程/进程栈的最大尺寸。
expand_stack()会对增加后的尺寸进行检查,确保符合RLIMIT_STACK等一系列限制。
int expand_stack(struct vm_area_struct *vma, unsigned long address) { return expand_downwards(vma, address); } int expand_downwards(struct vm_area_struct *vma, unsigned long address) { ... /* Somebody else might have raced and expanded it already */ if (address < vma->vm_start) { unsigned long size, grow; size = vma->vm_end - address; grow = (vma->vm_start - address) >> PAGE_SHIFT; error = -ENOMEM; if (grow <= vma->vm_pgoff) { error = acct_stack_growth(vma, size, grow); if (!error) { ... } } } anon_vma_unlock_write(vma->anon_vma); khugepaged_enter_vma_merge(vma, vma->vm_flags); validate_mm(mm); return error; } static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, unsigned long grow) { struct mm_struct *mm = vma->vm_mm; struct rlimit *rlim = current->signal->rlim; unsigned long new_start; /* address space limit tests */ if (!may_expand_vm(mm, vma->vm_flags, grow))-------------首先检查内存空间是否够用。 return -ENOMEM; /* Stack limit test */ if (size > READ_ONCE(rlim[RLIMIT_STACK].rlim_cur)) return -ENOMEM;--------------------------------------检查申请栈size是否超过栈空间限制。 /* mlock limit tests */ if (vma->vm_flags & VM_LOCKED) { unsigned long locked; unsigned long limit; locked = mm->locked_vm + grow; limit = READ_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur); limit >>= PAGE_SHIFT; if (locked > limit && !capable(CAP_IPC_LOCK)) return -ENOMEM; } /* Check to ensure the stack will not grow into a hugetlb-only region */ new_start = (vma->vm_flags & VM_GROWSUP) ? vma->vm_start : vma->vm_end - size; if (is_hugepage_only_range(vma->vm_mm, new_start, size)) return -EFAULT; /* * Overcommit.. This must be the final test, as it will * update security statistics. */ if (security_vm_enough_memory_mm(mm, grow)) return -ENOMEM; return 0; }
3.5 RLIMIT_CORE 4 /* max core file size */
RLIMIT_CORE限制了coredump产生文件尺寸的最大值,如果为0说明不允许创建core文件。
void do_coredump(const siginfo_t *siginfo) { ... struct coredump_params cprm = { .siginfo = siginfo, .regs = signal_pt_regs(), .limit = rlimit(RLIMIT_CORE),--------------------------------cprm中包含了对coredump文件大小的限制,在具体格式进行coredump过程中会检查coredump文件是否超过此值。 .mm_flags = mm->flags, }; ... }
3.6 RLIMIT_RSS 5 /* max resident set size */
RLIMIT_RSS限制了进程最大实际内存使用量,未起作用。
3.7 RLIMIT_NPROC 6 /* max number of processes */
RLIMIT_NPROC规定了每个real user id的子进程数量的最大值.
do_execueat_common()创建新进程的时候检查current_user()->processes,如果超过RLIMIT_NPROC则返回EAGAIN,表示资源不够使用。
copy_process()创建新进程/线程的使用同样会进行检查。
static int do_execveat_common(int fd, struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp, int flags) { ... if ((current->flags & PF_NPROC_EXCEEDED) && atomic_read(¤t_user()->processes) > rlimit(RLIMIT_NPROC)) { retval = -EAGAIN; goto out_ret; } ... } static __latent_entropy struct task_struct *copy_process( unsigned long clone_flags, unsigned long stack_start, unsigned long stack_size, int __user *child_tidptr, struct pid *pid, int trace, unsigned long tls, int node) { ... if (atomic_read(&p->real_cred->user->processes) >= task_rlimit(p, RLIMIT_NPROC)) { if (p->real_cred->user != INIT_USER && !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) { goto bad_fork_free; } } ... }
max_threads如何计算?
max_threads的大小是由set_max_threads()计算出来的。
在进程创建的时候fork_init()设置max_threads,或者通过sysctl_max_threads()进行设置。
可以通过/proc/sys/kernel/threads-max获取当前系统的max_threads。
void __init fork_init(void) {
... set_max_threads(MAX_THREADS); ... } int sysctl_max_threads(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int ret; int threads = max_threads; int min = MIN_THREADS; int max = MAX_THREADS; t = *table; t.data = &threads; t.extra1 = &min; t.extra2 = &max; ret = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (ret || !write) return ret; set_max_threads(threads); return 0; } static void set_max_threads(unsigned int max_threads_suggested) { u64 threads; /* * The number of threads shall be limited such that the thread * structures may only consume a small part of the available memory. */ if (fls64(totalram_pages) + fls64(PAGE_SIZE) > 64) threads = MAX_THREADS; else threads = div64_u64((u64) totalram_pages * (u64) PAGE_SIZE, (u64) THREAD_SIZE * 8UL); if (threads > max_threads_suggested) threads = max_threads_suggested; max_threads = clamp_t(u64, threads, MIN_THREADS, MAX_THREADS); }
THREAD_SIZE为2个页面:
#define THREAD_SIZE (PAGE_SIZE * 2)
#define MIN_THREADS 20
#define FUTEX_TID_MASK 0x3fffffff
#define MAX_THREADS FUTEX_TID_MASK
所以max_threads数量为max_threads=totalram_pages*PAGE_SIZE/(THREAD_SIZE*8)。
在totalram_pages为100556的情况下,max_threads=100556/16=6284.75,实际的RLIMIT_NPROC=max_threads/2,即为3142。
可以通过ulimit -p验证。
3.8 RLIMIT_NOFILE 7 /* max number of open files */
RLIMIT_NOFILE限制进程打开文件数量最大值。
比如alloc_fd()申请文件句柄号,end对应的就是RLIMIT_NOFILES。
static int alloc_fd(unsigned start, unsigned flags) { return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);-----current->files是当前进程的打开文件列表。 } int get_unused_fd_flags(unsigned flags) { return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags); } int __alloc_fd(struct files_struct *files, unsigned start, unsigned end, unsigned flags) { ... error = -EMFILE; if (fd >= end)---------------------------------------------------------------如果找到的fd超过RLIMIT_NOFILE则返回错误。 goto out; ... }
3.9 RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */
RLIMIT_MEMLOCK用于限制使用mlock()锁定的locked_vm内存最大使用量。
static __must_check int do_mlock(unsigned long start, size_t len, vm_flags_t flags) { unsigned long locked; unsigned long lock_limit; int error = -ENOMEM; if (!can_do_mlock()) return -EPERM; lru_add_drain_all(); /* flush pagevec */ len = PAGE_ALIGN(len + (offset_in_page(start))); start &= PAGE_MASK; lock_limit = rlimit(RLIMIT_MEMLOCK);-----------------------------------------系统对RLIMIT_MEMLOCK的阈值。 lock_limit >>= PAGE_SHIFT; locked = len >> PAGE_SHIFT;--------------------------------------------------本次mlock内存大小。 if (down_write_killable(¤t->mm->mmap_sem)) return -EINTR; locked += current->mm->locked_vm;--------------------------------------------进程中已经mlock内存大小。 if ((locked > lock_limit) && (!capable(CAP_IPC_LOCK))) { locked -= count_mm_mlocked_page_nr(current->mm, start, len); } if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))-------------------------进行mlock内存检查,如有错误返回错误类型。 error = apply_vma_lock_flags(start, len, flags); up_write(¤t->mm->mmap_sem); if (error) return error; error = __mm_populate(start, len, 0); if (error) return __mlock_posix_error_return(error); return 0; }
3.10 RLIMIT_AS 9 /* address space limit */
RLIMIT_AS表示进程可使用的最大虚拟内存大小,超过后则不允许继续申请内存。
bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages) { if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)----------------------total_vm加上将要新增内存,如果超过RLIMIT_AS则返回错误。 return false; ... return true; }
3.11 RLIMIT_LOCKS 10 /* maximum file locks held */
RLIMIT_LOCKS表示进程可建立的文件锁数量最大值,未使用。
3.12 RLIMIT_SIGPENDING 11 /* max number of pending signals */
RLIMIT_SIGPENDING表示进程信号等待队列最大大小,一般等RLIMIT_NPROC。
void __init fork_init(void) { ... set_max_threads(MAX_THREADS); init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2; init_task.signal->rlim[RLIMIT_SIGPENDING] = init_task.signal->rlim[RLIMIT_NPROC]; ... }
在__sigqueue_alloc()中,检查override_rlimit以及RLIMIT_SIGPENDING,才会对最初是否分配内存给pending信号。否则丢弃。
static struct sigqueue * __sigqueue_alloc(int sig, struct task_struct *t, gfp_t flags, int override_rlimit) { ... if (override_rlimit || atomic_read(&user->sigpending) <= task_rlimit(t, RLIMIT_SIGPENDING)) {-----------------------------------在不使用override_rlimit以及当前用户sigpending不超过RLIMIT_SIGPENDING条件下,才可以申请sigqueue。 q = kmem_cache_alloc(sigqueue_cachep, flags); } else { print_dropped_signal(sig);-------------------------------------------------否则信号将被丢弃。 } if (unlikely(q == NULL)) { atomic_dec(&user->sigpending); free_uid(user); } else { INIT_LIST_HEAD(&q->list); q->flags = 0; q->user = user; } return q; }
3.13 RLIMIT_MSGQUEUE 12 /* maximum bytes in POSIX mqueues */
RLIMIT_MSGQUEUE限制了进程可谓POSIX消息队列分配的最大字节数,超过限制后返回EMFILE错误。
static struct inode *mqueue_get_inode(struct super_block *sb, struct ipc_namespace *ipc_ns, umode_t mode, struct mq_attr *attr) { ... if (S_ISREG(mode)) { ... if (u->mq_bytes + mq_bytes < u->mq_bytes || u->mq_bytes + mq_bytes > rlimit(RLIMIT_MSGQUEUE)) { spin_unlock(&mq_lock); /* mqueue_evict_inode() releases info->messages */ ret = -EMFILE; goto out_inode; } ... } else if (S_ISDIR(mode)) { ... } return inode; out_inode: iput(inode); err: return ERR_PTR(ret); }
3.14 RLIMIT_NICE 13 /* max nice prio allowed to raise to 0-39 for nice level 19 .. -20 */
RLIMIT_NICE限制了进程可通过setpriority()或者nice()调用设置的最大nice值。
static void binder_set_nice(long nice) { long min_nice; if (can_nice(current, nice)) { set_user_nice(current, nice); return; } min_nice = rlimit_to_nice(current->signal->rlim[RLIMIT_NICE].rlim_cur); binder_debug(BINDER_DEBUG_PRIORITY_CAP, "%d: nice value %ld not allowed use %ld instead\n", current->pid, nice, min_nice); set_user_nice(current, min_nice); if (min_nice <= MAX_NICE) return; binder_user_error("%d RLIMIT_NICE not set\n", current->pid); } int can_nice(const struct task_struct *p, const int nice) { /* convert nice value [19,-20] to rlimit style value [1,40] */ int nice_rlim = nice_to_rlimit(nice); return (nice_rlim <= task_rlimit(p, RLIMIT_NICE) || capable(CAP_SYS_NICE));----------------------------------------------只有在进程具备CAP_SYS_NICE并且申请的nice值小于RLIMIT_NICE,才会被允许修改nice值。 }
3.15 RLIMIT_RTPRIO 14 /* maximum realtime priority */
RLIMIT_RTPRIO限制进程可通过sched_setscheduler()和sched_setparam()可设置的最大实时优先级。
对于RT线程,超过RLIMIT_RTPRIO则返回EPERM错误。
static int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi) { ... if (user && !capable(CAP_SYS_NICE)) { if (fair_policy(policy)) { if (attr->sched_nice < task_nice(p) && !can_nice(p, attr->sched_nice)) return -EPERM; } if (rt_policy(policy)) { unsigned long rlim_rtprio = task_rlimit(p, RLIMIT_RTPRIO); /* can't set/change the rt policy */ if (policy != p->policy && !rlim_rtprio) return -EPERM; /* can't increase priority */ if (attr->sched_priority > p->rt_priority && attr->sched_priority > rlim_rtprio) return -EPERM; } ... } ... }
3.16 RLIMIT_RTTIME 15 /* timeout for RT tasks in us */
RLIMIT_RTTIME限制了实时进程timer最大超时时间。
在check_thread_timers()中会对定时器超时值进行检查,并且watchdog()中
static void check_thread_timers(struct task_struct *tsk, struct list_head *firing) { ... soft = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_cur); if (soft != RLIM_INFINITY) { unsigned long hard = READ_ONCE(sig->rlim[RLIMIT_RTTIME].rlim_max); if (hard != RLIM_INFINITY && tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) { /* * At the hard limit, we just die. * No need to calculate anything else now. */ __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);--------------------------如果实时线程的超时值,超过RLIMIT_RTTIME的rlim_max之后发送SIGKILL信号。 return; } if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) { /* * At the soft limit, send a SIGXCPU every second. */ if (soft < hard) { soft += USEC_PER_SEC; sig->rlim[RLIMIT_RTTIME].rlim_cur = soft; } printk(KERN_INFO "RT Watchdog Timeout: %s[%d]\n", tsk->comm, task_pid_nr(tsk)); __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);-------------------------如果实时线程的超时值,超过RLIMIT_RTTIME的rlim_cur之后发送SIGXCPU信号。 } } if (task_cputime_zero(tsk_expires)) tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER); } static void watchdog(struct rq *rq, struct task_struct *p) { unsigned long soft, hard; /* max may change after cur was read, this will be fixed next tick */ soft = task_rlimit(p, RLIMIT_RTTIME); hard = task_rlimit_max(p, RLIMIT_RTTIME); if (soft != RLIM_INFINITY) { unsigned long next; if (p->rt.watchdog_stamp != jiffies) { p->rt.timeout++; p->rt.watchdog_stamp = jiffies; } next = DIV_ROUND_UP(min(soft, hard), USEC_PER_SEC/HZ); if (p->rt.timeout > next) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; } }