kernel——proc
1. 简介
proc文件系统:一种和内核交互的接口,最早专用于读写进程信息。
特点:
proc文件系统的文件节点只能从内核层创建,且这些节点的 ops 和 设备节点类似,是由创建者定义的。
2. proc文件系统的注册和挂载
start_kernel
--vfs_caches_init 挂载rootfs为根文件系统
--proc_root_init 初始化proc
static struct file_system_type proc_fs_type = {
.name = "proc",
.init_fs_context = proc_init_fs_context,
.parameters = proc_fs_parameters,
.kill_sb = proc_kill_sb,
.fs_flags = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
};
void __init proc_root_init(void)
{
proc_init_kmemcache();
set_proc_pid_nlink();
proc_self_init();
proc_thread_self_init();
proc_symlink("mounts", NULL, "self/mounts");
proc_net_init();
proc_mkdir("fs", NULL);
proc_mkdir("driver", NULL);
proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */
proc_create_mount_point("openprom");
#endif
proc_tty_init();
proc_mkdir("bus", NULL);
proc_sys_init();
register_filesystem(&proc_fs_type); // 注册proc文件系统
}
在注册proc文件系统前还创建了一些proc节点
proc的挂载
在init程序运行后,会读/etc/fstab文件,根据配置将proc文件系统挂载到/proc目录。
3. 创建一个proc文件节点
关键点:
- 使用 proc接口 创建/删除 文件节点
proc_create
- 定义struct proc_ops,实现该节点的read,write等回调
// name : 文件名
// mode : 访问权限
// parent : 父目录
// proc_ops : 节点的ops
struct proc_dir_entry *proc_create(const char *name, umode_t mode,
struct proc_dir_entry *parent,
const struct proc_ops *proc_ops);
/*
* Remove a /proc entry and free it if it's not currently in use.
*/
void remove_proc_entry(const char *name, struct proc_dir_entry *parent);
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/fs.h>
#define FILE_NAME "hello"
static char gbuf[60];
ssize_t hello_read(struct file *, char __user *, size_t, loff_t *);
ssize_t hello_write(struct file *, const char __user *, size_t, loff_t *);
struct proc_ops hello_ops = {
.proc_read = hello_read,
.proc_write = hello_write,
};
ssize_t hello_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
return simple_read_from_buffer(buf, size, ppos, gbuf, sizeof(gbuf));
}
ssize_t hello_write(struct file *filp, const char __user *buf, size_t size, loff_t *ppos)
{
return simple_write_to_buffer(gbuf, sizeof(gbuf), ppos, buf, size);
}
static void __exit hello_exit(void)
{
remove_proc_entry(FILE_NAME, NULL);
printk("Remove /proc/%s succeed\n", FILE_NAME);
}
static int __init hello_init(void)
{
if (proc_create(FILE_NAME, 0666, NULL, &hello_ops) == NULL)
printk("Create /proc/%s failed\n", FILE_NAME);
else
printk("Create /proc/%s succeed\n", FILE_NAME);
return 0;
}
module_init(hello_init);
module_exit(hello_exit);
MODULE_LICENSE("GPL");
4. 创建proc目录
proc_mkdir
proc_mkdir_mode
proc_create
proc_create_data
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/fs.h>
static char gbuf[60];
ssize_t hello_read(struct file *, char __user *, size_t, loff_t *);
ssize_t hello_write(struct file *, const char __user *, size_t, loff_t *);
struct proc_ops hello_ops = {
.proc_read = hello_read,
.proc_write = hello_write,
};
static struct proc_dir_entry *hello_dir0;
static struct proc_dir_entry *hello_dir1;
static struct proc_dir_entry *hello_file;
ssize_t hello_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
return simple_read_from_buffer(buf, size, ppos, gbuf, sizeof(gbuf));
}
ssize_t hello_write(struct file *filp, const char __user *buf, size_t size, loff_t *ppos)
{
return simple_write_to_buffer(gbuf, sizeof(gbuf), ppos, buf, size);
}
static void __exit hello_exit(void)
{
remove_proc_entry("hello", hello_dir1);
printk("Remove /proc/hello_dir0/hello_dir1/hello succeed\n");
remove_proc_entry("hello_dir1", hello_dir0);
printk("Remove /proc/hello_dir0/hello_dir1 succeed\n");
remove_proc_entry("hello_dir0", NULL);
printk("Remove /proc/hello_dir0 succeed\n");
}
static int __init hello_init(void)
{
// /hello_dir0/hello_dir1/hello
if ((hello_dir0 = proc_mkdir("hello_dir0", NULL)) == NULL) {
printk("Create /proc/hello_dir0 failed\n");
return -1;
}
printk("Create /proc/hello_dir0 succeed\n");
if ((hello_dir1 = proc_mkdir_data("hello_dir1", 0666, hello_dir0, NULL)) == NULL) {
printk("Create /proc/hello_dir0/hello_dir1 failed\n");
return -1;
}
printk("Create /proc/hello_dir1 succeed\n");
if ((hello_file = proc_create("hello", 0666, hello_dir1, &hello_ops)) == NULL) {
printk("Create /proc/hello_dir0/hello_dir1/hello failed\n");
return -1;
}
printk("Create /proc/hello_dir0/hello_dir1/hello succeed\n");
return 0;
}
module_init(hello_init);
module_exit(hello_exit);
MODULE_LICENSE("GPL");
5. seq_file
在 proc read 回调时,通常需要将碎片数据整合输出,所以需要堆空间存储,为了方便此操作内核提供了 seq_file 用于数据序列化
seq_file 的接口较多,下面分析两个使用方法
5.0 start
5.1 单元素
比如 /proc/filesystems
static int filesystems_proc_show(struct seq_file *m, void *v)
{
struct file_system_type * tmp;
read_lock(&file_systems_lock);
tmp = file_systems;
while (tmp) {
seq_printf(m, "%s\t%s\n",
(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
tmp->name);
tmp = tmp->next;
}
read_unlock(&file_systems_lock);
return 0;
}
static int __init proc_filesystems_init(void)
{
proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
return 0;
}
module_init(proc_filesystems_init);
深入分析
#define proc_create_single(name, mode, parent, show) \
proc_create_single_data(name, mode, parent, show, NULL)
static const struct proc_ops proc_single_ops = {
/* not permanent -- can call into arbitrary ->single_show */
.proc_open = proc_single_open,
.proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = single_release,
};
struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
struct proc_dir_entry *parent,
int (*show)(struct seq_file *, void *), void *data)
{
struct proc_dir_entry *p;
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
p->proc_ops = &proc_single_ops; // proc_ops 都用 seq_xxx 实现
/*
struct proc_dir_entry {
...
union {
const struct seq_operations *seq_ops;
int (*single_show)(struct seq_file *, void *);
};
...
};
*/
p->single_show = show;
return proc_register(parent, p);
}
上面执行后,创建了proc文件
当用户open文件
int seq_open(struct file *file, const struct seq_operations *op)
{
struct seq_file *p;
WARN_ON(file->private_data);
p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
if (!p)
return -ENOMEM;
file->private_data = p;
mutex_init(&p->lock);
p->op = op;
// No refcounting: the lifetime of 'p' is constrained
// to the lifetime of the file.
p->file = file;
file->f_mode &= ~FMODE_PWRITE;
return 0;
}
int single_open(struct file *file, int (*show)(struct seq_file *, void *),
void *data)
{
struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT);
int res = -ENOMEM;
if (op) {
op->start = single_start;
op->next = single_next;
op->stop = single_stop;
op->show = show;
res = seq_open(file, op);
if (!res)
((struct seq_file *)file->private_data)->private = data;
else
kfree(op);
}
return res;
}
static int proc_single_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *de = PDE(inode);
return single_open(file, de->single_show, de->data);
}
可见open后创建如下结构
当readv时
ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
{
struct seq_file *m = iocb->ki_filp->private_data;
...
// seq 会维护一个缓存,用于序列化输出
if (!m->buf) {
m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
if (!m->buf)
goto Enomem;
}
...
// get a non-empty record in the buffer
m->from = 0;
p = m->op->start(m, &m->index); // 当 start 返回 要显示的元素的指针,若为NULL,表示遍历结束
while (1) {
err = PTR_ERR(p);
if (!p || IS_ERR(p)) // EOF or an error
break;
err = m->op->show(m, p); // 将要显示的元素的指针给 show, show应该对 m->buf进行写
if (err < 0) // hard error
break;
if (unlikely(err)) // ->show() says "skip it"
m->count = 0;
if (unlikely(!m->count)) { // empty record m->count 为当前缓存区有效数据的计数
p = m->op->next(m, p, &m->index);
continue;
}
if (!seq_has_overflowed(m)) // got it 当 m->count != m->size, 缓存区写完了数据,且缓冲区足够大
goto Fill;
// need a bigger buffer // 缓冲区不够大
m->op->stop(m, p);
kvfree(m->buf);
m->count = 0;
m->buf = seq_buf_alloc(m->size <<= 1);
if (!m->buf)
goto Enomem;
p = m->op->start(m, &m->index);
}
// EOF or an error
m->op->stop(m, p);
m->count = 0;
goto Done;
Fill:
// one non-empty record is in the buffer; if they want more,
// try to fit more in, but in any case we need to advance
// the iterator once for every record shown.
while (1) {
size_t offs = m->count;
loff_t pos = m->index;
p = m->op->next(m, p, &m->index); // m->index开始被设置为0,next应该移动index,返回下一个需要显示的元素
if (pos == m->index) {
pr_info_ratelimited("buggy .next function %ps did not update position index\n",
m->op->next);
m->index++;
}
if (!p || IS_ERR(p)) // no next record for us 如果 next返回NULL,说明遍历完了
break;
if (m->count >= iov_iter_count(iter)) // 如果用户的缓存填充满了,遍历结束
break;
err = m->op->show(m, p); // 将元素p显示到 m->buf
if (err > 0) { // ->show() says "skip it"
m->count = offs;
} else if (err || seq_has_overflowed(m)) {
m->count = offs;
break;
}
}
m->op->stop(m, p); // 遍历结束的回调
n = copy_to_iter(m->buf, m->count, iter); // 将序列化号的数据m->buf 拷贝到用户的缓存 iter
}
可以看出 seq 只是给了序列化的框架,并解决了 缓存的问题,我们当前的回调是这些
static void *single_start(struct seq_file *p, loff_t *pos)
{
return NULL + (*pos == 0);
}
static void *single_next(struct seq_file *p, void *v, loff_t *pos)
{
++*pos;
return NULL;
}
static void single_stop(struct seq_file *p, void *v)
{
}
static int filesystems_proc_show(struct seq_file *m, void *v)
{
struct file_system_type * tmp;
read_lock(&file_systems_lock);
tmp = file_systems;
while (tmp) {
seq_printf(m, "%s\t%s\n",
(tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
tmp->name);
tmp = tmp->next;
}
read_unlock(&file_systems_lock);
return 0;
}
可见 next 一定返回NULL,所以调用过程是
start -> show -> next返回NULL 结束 -> stop -> copy_to_iter
所以调用了一次filesystems_proc_show 将数据序列化到m->buf,之后被拷贝到iter
当用户close时
int single_release(struct inode *inode, struct file *file)
{
const struct seq_operations *op = ((struct seq_file *)file->private_data)->op ;
int res = seq_release(inode, file);
kfree(op);
return res;
}
int seq_release(struct inode *inode, struct file *file)
{
struct seq_file *m = file->private_data;
kvfree(m->buf);
kmem_cache_free(seq_file_cache, m);
return 0;
}
可见close时,seq会释放 m->buf,并释放 seq_file,释放 seq_operations
多元素
看 /proc/interrupts
static const struct seq_operations int_seq_ops = {
.start = int_seq_start,
.next = int_seq_next,
.stop = int_seq_stop,
.show = show_interrupts
};
static int __init proc_interrupts_init(void)
{
proc_create_seq("interrupts", 0, NULL, &int_seq_ops);
return 0;
}
#define proc_create_seq(name, mode, parent, ops) \
proc_create_seq_private(name, mode, parent, ops, 0, NULL)
static const struct proc_ops proc_seq_ops = {
/* not permanent -- can call into arbitrary seq_operations */
.proc_open = proc_seq_open,
.proc_read_iter = seq_read_iter,
.proc_lseek = seq_lseek,
.proc_release = proc_seq_release,
};
struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
struct proc_dir_entry *parent, const struct seq_operations *ops,
unsigned int state_size, void *data)
{
struct proc_dir_entry *p;
p = proc_create_reg(name, mode, &parent, data);
if (!p)
return NULL;
p->proc_ops = &proc_seq_ops;
p->seq_ops = ops;
p->state_size = state_size;
return proc_register(parent, p);
}
当创建proc目录项后,
p->proc_ops = &proc_seq_ops;
p->seq_ops = ops;
当用户open时
static int proc_seq_open(struct inode *inode, struct file *file)
{
struct proc_dir_entry *de = PDE(inode);
if (de->state_size)
return seq_open_private(file, de->seq_ops, de->state_size);
return seq_open(file, de->seq_ops);
}
与单元素的差别是 ((struct seq_file *)file->private_data)->op 为 int_seq_ops
当用户readv时
static void *int_seq_start(struct seq_file *f, loff_t *pos)
{
return (*pos <= nr_irqs) ? pos : NULL; // 最早index为0,所以返回 index的指针
}
static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
{
(*pos)++; // 增加 index
if (*pos > nr_irqs)
return NULL;
return pos;
}
static void int_seq_stop(struct seq_file *f, void *v)
{
/* Nothing to do */
}
int show_interrupts(struct seq_file *p, void *v)
{
static int prec;
unsigned long flags, any_count = 0;
int i = *(loff_t *) v, j; // v为 index的指针,得到index
struct irqaction *action;
struct irq_desc *desc;
...
desc = irq_to_desc(i); // 得到描述项
...
if (desc->irq_data.domain)
seq_printf(p, " %*lu", prec, desc->irq_data.hwirq); // 序列化到 p->buf
else
seq_printf(p, " %*s", prec, "");
...
}
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?