kernel——proc

1. 简介

proc文件系统：一种和内核交互的接口，最早专用于读写进程信息。
特点：
proc文件系统的文件节点只能从内核层创建，且这些节点的 ops 和设备节点类似，是由创建者定义的。

2. proc文件系统的注册和挂载

start_kernel
--vfs_caches_init 挂载rootfs为根文件系统
--proc_root_init 初始化proc

  static struct file_system_type proc_fs_type = {
      .name           = "proc",
      .init_fs_context    = proc_init_fs_context,
      .parameters     = proc_fs_parameters,
      .kill_sb        = proc_kill_sb,
      .fs_flags       = FS_USERNS_MOUNT | FS_DISALLOW_NOTIFY_PERM,
  };

  void __init proc_root_init(void)
  {
      proc_init_kmemcache();
      set_proc_pid_nlink();
      proc_self_init();
      proc_thread_self_init();
      proc_symlink("mounts", NULL, "self/mounts");

      proc_net_init();
      proc_mkdir("fs", NULL);
      proc_mkdir("driver", NULL);
      proc_create_mount_point("fs/nfsd"); /* somewhere for the nfsd filesystem to be mounted */
  #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
      /* just give it a mountpoint */
      proc_create_mount_point("openprom");
  #endif
      proc_tty_init();
      proc_mkdir("bus", NULL);
      proc_sys_init();

      register_filesystem(&proc_fs_type); // 注册proc文件系统
  }

在注册proc文件系统前还创建了一些proc节点

proc的挂载
在init程序运行后，会读/etc/fstab文件，根据配置将proc文件系统挂载到/proc目录。

3. 创建一个proc文件节点

关键点：

使用 proc接口创建/删除文件节点
proc_create

定义struct proc_ops，实现该节点的read,write等回调

  // name : 文件名
  // mode : 访问权限
  // parent : 父目录
  // proc_ops : 节点的ops
  struct proc_dir_entry *proc_create(const char *name, umode_t mode,
                     struct proc_dir_entry *parent,
                     const struct proc_ops *proc_ops);

  /*
   * Remove a /proc entry and free it if it's not currently in use.
   */
  void remove_proc_entry(const char *name, struct proc_dir_entry *parent);

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/fs.h>

#define FILE_NAME "hello"

static char gbuf[60];

ssize_t hello_read(struct file *, char __user *, size_t, loff_t *);
ssize_t hello_write(struct file *, const char __user *, size_t, loff_t *);

struct proc_ops hello_ops = {
        .proc_read = hello_read,
        .proc_write = hello_write,
};

ssize_t hello_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
        return simple_read_from_buffer(buf, size, ppos, gbuf, sizeof(gbuf));
}

ssize_t hello_write(struct file *filp, const char __user *buf, size_t size, loff_t *ppos)
{
        return simple_write_to_buffer(gbuf, sizeof(gbuf), ppos, buf, size);
}

static void __exit hello_exit(void)
{
        remove_proc_entry(FILE_NAME, NULL);
        printk("Remove /proc/%s succeed\n", FILE_NAME);
}

static int __init hello_init(void)
{
        if (proc_create(FILE_NAME, 0666, NULL, &hello_ops) == NULL)
                printk("Create /proc/%s failed\n", FILE_NAME);
        else
                printk("Create /proc/%s succeed\n", FILE_NAME);

        return 0;
}

module_init(hello_init);
module_exit(hello_exit);

MODULE_LICENSE("GPL");

4. 创建proc目录

proc_mkdir
proc_mkdir_mode
proc_create
proc_create_data

#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/proc_fs.h>
#include <linux/fs.h>

static char gbuf[60];

ssize_t hello_read(struct file *, char __user *, size_t, loff_t *);
ssize_t hello_write(struct file *, const char __user *, size_t, loff_t *);

struct proc_ops hello_ops = {
        .proc_read = hello_read,
        .proc_write = hello_write,
};

static struct  proc_dir_entry *hello_dir0;
static struct  proc_dir_entry *hello_dir1;
static struct  proc_dir_entry *hello_file;

ssize_t hello_read(struct file *filp, char __user *buf, size_t size, loff_t *ppos)
{
        return simple_read_from_buffer(buf, size, ppos, gbuf, sizeof(gbuf));
}

ssize_t hello_write(struct file *filp, const char __user *buf, size_t size, loff_t *ppos)
{
        return simple_write_to_buffer(gbuf, sizeof(gbuf), ppos, buf, size);
}

static void __exit hello_exit(void)
{
        remove_proc_entry("hello", hello_dir1);
        printk("Remove /proc/hello_dir0/hello_dir1/hello succeed\n");

        remove_proc_entry("hello_dir1", hello_dir0);
        printk("Remove /proc/hello_dir0/hello_dir1 succeed\n");

        remove_proc_entry("hello_dir0", NULL);
        printk("Remove /proc/hello_dir0 succeed\n");
}


static int __init hello_init(void)
{
        // /hello_dir0/hello_dir1/hello

        if ((hello_dir0 = proc_mkdir("hello_dir0", NULL)) == NULL) {
                printk("Create /proc/hello_dir0 failed\n");
                return -1;
        }
        printk("Create /proc/hello_dir0 succeed\n");

        if ((hello_dir1 =  proc_mkdir_data("hello_dir1", 0666, hello_dir0, NULL)) == NULL) {
                printk("Create /proc/hello_dir0/hello_dir1 failed\n");
                return -1;
        }
        printk("Create /proc/hello_dir1 succeed\n");

        if ((hello_file = proc_create("hello", 0666, hello_dir1, &hello_ops)) == NULL) {
                printk("Create /proc/hello_dir0/hello_dir1/hello failed\n");
                return -1;
        }
        printk("Create /proc/hello_dir0/hello_dir1/hello succeed\n");

        return 0;
}

module_init(hello_init);
module_exit(hello_exit);

MODULE_LICENSE("GPL");

5. seq_file

在 proc read 回调时，通常需要将碎片数据整合输出，所以需要堆空间存储，为了方便此操作内核提供了 seq_file 用于数据序列化
seq_file 的接口较多，下面分析两个使用方法

5.0 start

5.1 单元素

比如 /proc/filesystems

  static int filesystems_proc_show(struct seq_file *m, void *v)
  {
      struct file_system_type * tmp;

      read_lock(&file_systems_lock);
      tmp = file_systems;
      while (tmp) {
          seq_printf(m, "%s\t%s\n",
              (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
              tmp->name);
          tmp = tmp->next;
      }
      read_unlock(&file_systems_lock);
      return 0;
  }

  static int __init proc_filesystems_init(void)
  {
      proc_create_single("filesystems", 0, NULL, filesystems_proc_show);
      return 0;
  }
  module_init(proc_filesystems_init);

深入分析

  #define proc_create_single(name, mode, parent, show) \
      proc_create_single_data(name, mode, parent, show, NULL)

  static const struct proc_ops proc_single_ops = {
      /* not permanent -- can call into arbitrary ->single_show */
      .proc_open  = proc_single_open,
      .proc_read_iter = seq_read_iter,
      .proc_lseek = seq_lseek,
      .proc_release   = single_release,
  };

  struct proc_dir_entry *proc_create_single_data(const char *name, umode_t mode,
          struct proc_dir_entry *parent,
          int (*show)(struct seq_file *, void *), void *data)
  {
      struct proc_dir_entry *p;

      p = proc_create_reg(name, mode, &parent, data);
      if (!p)
          return NULL;
      p->proc_ops = &proc_single_ops;  // proc_ops 都用 seq_xxx 实现
      
/*
   struct proc_dir_entry {
      ...
      union {
          const struct seq_operations *seq_ops;
          int (*single_show)(struct seq_file *, void *);
      };
      ...
    };
 */
      p->single_show = show;  
      return proc_register(parent, p);
  }

上面执行后，创建了proc文件
当用户open文件

  int seq_open(struct file *file, const struct seq_operations *op)
  {
      struct seq_file *p;

      WARN_ON(file->private_data);

      p = kmem_cache_zalloc(seq_file_cache, GFP_KERNEL);
      if (!p)
          return -ENOMEM;

      file->private_data = p;

      mutex_init(&p->lock);
      p->op = op;

      // No refcounting: the lifetime of 'p' is constrained
      // to the lifetime of the file.
      p->file = file;
      file->f_mode &= ~FMODE_PWRITE;
      return 0;
  }

  int single_open(struct file *file, int (*show)(struct seq_file *, void *),
          void *data)
  {
      struct seq_operations *op = kmalloc(sizeof(*op), GFP_KERNEL_ACCOUNT);
      int res = -ENOMEM;

      if (op) {
          op->start = single_start;
          op->next = single_next;
          op->stop = single_stop;
          op->show = show;
          res = seq_open(file, op);
          if (!res)
              ((struct seq_file *)file->private_data)->private = data;
          else
              kfree(op);
      }
      return res;
  }

  static int proc_single_open(struct inode *inode, struct file *file)
  {
      struct proc_dir_entry *de = PDE(inode);

      return single_open(file, de->single_show, de->data);
  }

可见open后创建如下结构

当readv时

  ssize_t seq_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  {
      struct seq_file *m = iocb->ki_filp->private_data;

      ...
      // seq 会维护一个缓存，用于序列化输出
      if (!m->buf) {
          m->buf = seq_buf_alloc(m->size = PAGE_SIZE);
          if (!m->buf)
              goto Enomem;
      }
    
      ...

      // get a non-empty record in the buffer
      m->from = 0;
      p = m->op->start(m, &m->index);   // 当 start 返回 要显示的元素的指针，若为NULL，表示遍历结束
      while (1) {
          err = PTR_ERR(p);
          if (!p || IS_ERR(p))    // EOF or an error
              break;
          err = m->op->show(m, p);    // 将要显示的元素的指针给 show， show应该对 m->buf进行写
          if (err < 0)        // hard error
              break;
          if (unlikely(err))  // ->show() says "skip it"
              m->count = 0;
          if (unlikely(!m->count)) { // empty record  m->count 为当前缓存区有效数据的计数
              p = m->op->next(m, p, &m->index);   
              continue;
          }
          if (!seq_has_overflowed(m)) // got it 当 m->count != m->size, 缓存区写完了数据，且缓冲区足够大
              goto Fill;
          // need a bigger buffer // 缓冲区不够大
          m->op->stop(m, p);
          kvfree(m->buf);
          m->count = 0;
          m->buf = seq_buf_alloc(m->size <<= 1);
          if (!m->buf)
              goto Enomem;
          p = m->op->start(m, &m->index);
      }
      // EOF or an error
      m->op->stop(m, p);
      m->count = 0;
      goto Done;
  Fill:
      // one non-empty record is in the buffer; if they want more,
      // try to fit more in, but in any case we need to advance
      // the iterator once for every record shown.
      while (1) {
          size_t offs = m->count;
          loff_t pos = m->index;

          p = m->op->next(m, p, &m->index);   // m->index开始被设置为0，next应该移动index，返回下一个需要显示的元素
          if (pos == m->index) {
              pr_info_ratelimited("buggy .next function %ps did not update position   index\n",
                          m->op->next);
              m->index++;
          }
          if (!p || IS_ERR(p))    // no next record for us   如果 next返回NULL，说明遍历完了
              break;
          if (m->count >= iov_iter_count(iter))  // 如果用户的缓存填充满了，遍历结束
              break;
          err = m->op->show(m, p);   // 将元素p显示到 m->buf
          if (err > 0) {      // ->show() says "skip it"
              m->count = offs;
          } else if (err || seq_has_overflowed(m)) {
              m->count = offs;
              break;
          }
      }
      m->op->stop(m, p); // 遍历结束的回调
      n = copy_to_iter(m->buf, m->count, iter); // 将序列化号的数据m->buf 拷贝到用户的缓存 iter

    }

可以看出 seq 只是给了序列化的框架，并解决了缓存的问题，我们当前的回调是这些

  static void *single_start(struct seq_file *p, loff_t *pos)
  {
      return NULL + (*pos == 0);
  }

  static void *single_next(struct seq_file *p, void *v, loff_t *pos)
  {
      ++*pos;
      return NULL;
  }

  static void single_stop(struct seq_file *p, void *v)
  {
  }

  static int filesystems_proc_show(struct seq_file *m, void *v)
  {
      struct file_system_type * tmp;

      read_lock(&file_systems_lock);
      tmp = file_systems;
      while (tmp) {
          seq_printf(m, "%s\t%s\n",
              (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
              tmp->name);
          tmp = tmp->next;
      }
      read_unlock(&file_systems_lock);
      return 0;
  }

可见 next 一定返回NULL，所以调用过程是
start -> show -> next返回NULL 结束 -> stop -> copy_to_iter
所以调用了一次filesystems_proc_show 将数据序列化到m->buf，之后被拷贝到iter

当用户close时

  int single_release(struct inode *inode, struct file *file)
  {
      const struct seq_operations *op = ((struct seq_file *)file->private_data)->op  ;
      int res = seq_release(inode, file);
      kfree(op);
      return res;
  }

  int seq_release(struct inode *inode, struct file *file)
  {
      struct seq_file *m = file->private_data;
      kvfree(m->buf);
      kmem_cache_free(seq_file_cache, m);
      return 0;
  }

可见close时，seq会释放 m->buf，并释放 seq_file，释放 seq_operations

多元素

看 /proc/interrupts

  static const struct seq_operations int_seq_ops = {
      .start = int_seq_start,
      .next  = int_seq_next,
      .stop  = int_seq_stop,
      .show  = show_interrupts
  };

  static int __init proc_interrupts_init(void)
  {
      proc_create_seq("interrupts", 0, NULL, &int_seq_ops);
      return 0;
  }

  #define proc_create_seq(name, mode, parent, ops) \
      proc_create_seq_private(name, mode, parent, ops, 0, NULL)

  static const struct proc_ops proc_seq_ops = {
      /* not permanent -- can call into arbitrary seq_operations */
      .proc_open  = proc_seq_open,
      .proc_read_iter = seq_read_iter,
      .proc_lseek = seq_lseek,
      .proc_release   = proc_seq_release,
  };

  struct proc_dir_entry *proc_create_seq_private(const char *name, umode_t mode,
          struct proc_dir_entry *parent, const struct seq_operations *ops,
          unsigned int state_size, void *data)
  {
      struct proc_dir_entry *p;

      p = proc_create_reg(name, mode, &parent, data);
      if (!p)
          return NULL;
      p->proc_ops = &proc_seq_ops;
      p->seq_ops = ops;
      p->state_size = state_size;
      return proc_register(parent, p);
  }

当创建proc目录项后，
p->proc_ops = &proc_seq_ops;
p->seq_ops = ops;

当用户open时

  static int proc_seq_open(struct inode *inode, struct file *file)
  {
      struct proc_dir_entry *de = PDE(inode);

      if (de->state_size)
          return seq_open_private(file, de->seq_ops, de->state_size);
      return seq_open(file, de->seq_ops);
  }

与单元素的差别是 ((struct seq_file *)file->private_data)->op 为 int_seq_ops
当用户readv时

  static void *int_seq_start(struct seq_file *f, loff_t *pos)
  {
      return (*pos <= nr_irqs) ? pos : NULL; // 最早index为0，所以返回 index的指针
  }

  static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos)
  {
      (*pos)++;  // 增加 index
      if (*pos > nr_irqs)
          return NULL;
      return pos;
  }

  static void int_seq_stop(struct seq_file *f, void *v)
  {
      /* Nothing to do */
  }

  int show_interrupts(struct seq_file *p, void *v)
  {
      static int prec;

      unsigned long flags, any_count = 0;
      int i = *(loff_t *) v, j;  // v为 index的指针，得到index
      struct irqaction *action;
      struct irq_desc *desc;

      ...
      desc = irq_to_desc(i);  // 得到描述项
      ...
      if (desc->irq_data.domain)
          seq_printf(p, " %*lu", prec, desc->irq_data.hwirq); // 序列化到 p->buf
      else
          seq_printf(p, " %*s", prec, "");
      ...
   }

posted on 2022-11-25 22:54 开心种树阅读(367) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

一切有为法，应作如是观