linux 2.6.32文件系统的dentry父子关系
我们知道,linux文件系统,inode和dentry是有对应关系的,dentry是文件名或者目录的一个管理结构,2.6内核中:
struct dentry { atomic_t d_count; unsigned int d_flags; spinlock_t d_lock; int d_mounted; struct inode *d_inode; struct hlist_node d_hash; struct dentry *d_parent; struct qstr d_name; struct list_head d_lru; union { struct list_head d_child; struct rcu_head d_rcu; } d_u; struct list_head d_subdirs; struct list_head d_alias; unsigned long d_time; const struct dentry_operations *d_op; struct super_block *d_sb; void *d_fsdata; unsigned char d_iname[32]; } SIZE: 192
3.10的内核中如下:
struct dentry { unsigned int d_flags; seqcount_t d_seq; struct hlist_bl_node d_hash; struct dentry *d_parent; struct qstr d_name; struct inode *d_inode; unsigned char d_iname[32]; struct lockref d_lockref; const struct dentry_operations *d_op; struct super_block *d_sb; unsigned long d_time; void *d_fsdata; struct list_head d_lru; union { struct list_head d_child; struct callback_head d_rcu; } d_u; struct list_head d_subdirs; struct hlist_node d_alias; }
对比两个结构,其实主要成员变化不大,比如d_parent,d_name,d_iname,下面以2.6为例来描述proc文件系统的父子关系,而父子关系中,主要就是由d_parent,d_u中的
d_child,d_subdirs这三个成员来维护的。
给定一个dentry:
crash> struct dentry 0xffff8818014b1540 struct dentry { d_count = { counter = 1 }, d_flags = 0, d_lock = { raw_lock = { slock = 196611 } }, d_mounted = 0, d_inode = 0xffff88100a8117f8,-----------指向这个dentry对应的inode d_hash = { next = 0x0, pprev = 0xffffc90000b13890 }, d_parent = 0xffff8818118002c0,----------指向父节点,也是一个dentry d_name = { hash = 3255717505, len = 8, name = 0xffff8818014b15e0 "slabinfo"------------------------文件名 }, d_lru = { next = 0xffff8818014b1580, prev = 0xffff8818014b1580 }, d_u = { d_child = { next = 0xffff880c117eb710, prev = 0xffff881811800320 }, d_rcu = { next = 0xffff880c117eb710, func = 0xffff881811800320 } }, d_subdirs = { next = 0xffff8818014b15a0, prev = 0xffff8818014b15a0 }, d_alias = { next = 0xffff88100a811828, prev = 0xffff88100a811828 }, d_time = 0, d_op = 0xffffffff81622b00 <proc_file_inode_operations+160>, d_sb = 0xffff880c1188cc00, d_fsdata = 0x0, d_iname = "slabinfo\000_offset\000ndler.py\000m\000\000\000\000"
可以看出:当前节点的文件名是slabinfo, 0xffff8818118002c0就是这个slabinfo的父节点,
crash> dentry 0xffff8818118002c0 struct dentry { d_count = { counter = 725 }, d_flags = 16, d_lock = { raw_lock = { slock = 797912975 } }, d_mounted = 0, d_inode = 0xffff880c11402cf8, d_hash = { next = 0x0, pprev = 0x0 }, d_parent = 0xffff8818118002c0,--------------对应的parent是自身,也就是当前是顶级节点。 d_name = { hash = 0, len = 1, name = 0xffff881811800360 "/"--------------当前节点名字,已经是根目录名字了 }, d_lru = { next = 0xffff881811800300, prev = 0xffff881811800300 }, d_u = { d_child = { next = 0xffff881811800310,-------d_u到dentry的偏移是0x50,也就是d_u的本身地址是310,然后这个地址的prev和next是自己本身,说明是根节点 prev = 0xffff881811800310 }, d_rcu = { next = 0xffff881811800310, func = 0xffff881811800310 } }, d_subdirs = {--------------------------这个链表中,存放的就是子节点的d_u的地址 next = 0xffff8818014b1590, prev = 0xffff880c0dd74590 }, d_alias = { next = 0xffff880c11402d28, prev = 0xffff880c11402d28 }, d_time = 0, d_op = 0x0, d_sb = 0xffff880c1188cc00, d_fsdata = 0x0, d_iname = "/\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" }
虽然我们知道slabinfo的目录是/proc/slabinfo,那为什么slabinfo的父目录就已经是“/”,而不是proc呢?因为进入/proc,则意味着切换了文件系统,/proc是挂载点。
我们来看他父目录的子目录/文件的目录项链表,也就是d_suddir 成员。我们知道,dentry可以通过d_parent来寻找自己的父dentry,比较有意思的是,dentry的d_u,会形成一条双向循环链表,按道理,双向循环链表是没有头的,但由于他们的父dentry的d_suddir 也嵌入在这个列表中,所有就可以从父dentry的d_suddirs 的地址来作为list的头来遍历:
crash> struct -xo dentry.d_subdirs ffff8818118002c0 struct dentry { [ffff881811800320] struct list_head d_subdirs; } crash> list ffff881811800320 ffff881811800320 ffff8818014b1590-----------------------这个就是slabinfo这个文件的d_u的地址 ffff880c117eb710 ffff8804ee7510d0 ffff880c0df82f50 ffff880db397a850 ffff88180c6359d0 ffff880c0de13110 ffff880a28539910 ffff880c0dc589d0 ffff880bfeee9e50 ffff8805cbccde90 ffff880bfed53410 ffff8812ac422650 ffff8810794cecd0 ffff881811812c10 ffff8812ac661190 ffff88181180cf10 ffff881801543c10 ffff880797d60550 ffff88100aa00890 ffff88100aa004d0 ffff88180140da90 ffff880a28569590 ffff880c11648b90 ffff880430752d90 ffff8804305dddd0 ffff8804b1f3ef50 ffff88126055f310 ffff88126055fcd0 ffff88126055f6d0 ffff881801561610 ffff881052e7fe90 ffff88126055f550 ffff880c1167ead0 ffff880c0a046850 ffff880c0dfd5710 ffff880c0dd40310 ffff880c0dd40610 ffff8804b1f2c550 ffff8810ead120d0 ffff880ffc7633d0 ffff8808af56ef10 ffff880bfed2d550 ffff880a284baf50 ffff880c0ddccf50
在这个list出来的地址中,都是dentry的d_u成员的地址,也就是都可以获取到对应的dentry,我们找到slabinfo这个文件的d_u的地址为:
crash> struct -xo dentry.d_u 0xffff8818014b1540 struct dentry { union { [ffff8818014b1590] } d_u; }
也就是说,dentry可以通过d_parent成员直接找到父节点的dentry的地址,而父节点的dentry,可以通过遍历d_subdirs 找到所有子文件或者子目录的地址,这个地址偏移之后,就可以获取到
对应的dentry,也就是父子关系形成。当然,并不是所有的子文件或者子目录都能遍历到,因为如果没人打开,则不会有dentry,毕竟内存有限。
有个地方需要注意,该文件系统的最高根目录,那么它的child按道理要嵌入到它父dentry的d_subdirs 形成链表,但由于他的父dentry就是本身,所以它的child指针还是指向自己,也就是:
crash> struct -xo dentry.d_u ffff8818118002c0 struct dentry { union { [ffff881811800310] } d_u; } crash> crash> crash> struct -x dentry.d_u.d_child ffff8818118002c0 d_u.d_child = { next = 0xffff881811800310, prev = 0xffff881811800310 },
内核中同样用两个成员来表示分级关系的还有:task_struct 中的children,sibling成员,父进程的child成员和子进程的sibling成员传成一个双向循环链表。我们自己设计到层级的结构的时候,可以参照这种设计方法。
注:用一个3.10的例子描述下:经常需要用到的一个结构是name,比如我需要知道某个inode对应的文件名,由于一个inode可以对应多个文件名,所以在inode结构中并没有文件名的直接对应,这个文件名是放在dentry中的,
crash> struct dentry.d_parent,d_name,d_iname ffff885221936780 d_parent = 0xffff88579984b140----------------对应的父dentry d_name = { { { hash = 1776607972, len = 7 }, hash_len = 31841379044 }, name = 0xffff8852219367b8 "vmlinux" } d_iname = "vmlinux\000nity_list\000.wants\000\064\000\000\000\000\000" crash> struct dentry.d_parent,d_name,d_iname 0xffff88579984b140------查看其父dentry的文件名 d_parent = 0xffff88579984ac00 d_name = { { { hash = 4190344536, len = 26 }, hash_len = 115859494232 }, name = 0xffff88579984b178 "3.10.0-693.21.1.el7.x86_64" } d_iname = "3.10.0-693.21.1.el7.x86_64\000\000\000\000\000"
通过获取d_name结构中的name,可以获取文件名,依次往上遍历d_parent,就可以获取整个路径名。
一个比较特殊的是根目录,这个根目录是指挂载的根目录,它的d_parent是自身,而且其名字是"/"
所以要获取完整的路径,可以在内核模块中参照如下的写法,具体的函数也可以参照内核的 dentry_path_raw,下面是手写的简化版:
void get_inode_filename(struct dentry *dentry, char *filename) { struct dentry *temp; const char *item[128]; int i = 0; temp = dentry; strcpy(filename, ""); do { item[i++] = temp->d_name.name; if (temp == temp->d_parent || !temp->d_parent) break; temp = temp->d_parent; }while (1); while ( i > 0 ) { strcat(filename, item[i - 1]); i --; if ( i > 0 && strcmp(item[i], "/") ) strcat(filename, "/"); } }
很明显的,如上的方法有一种硬伤,就是它遍历到的根目录,就是这个文件归属的挂载点,而我们知道,一个文件的多级目录,可能属于多个挂载点,那怎么获取进一步的全路径呢?
我们来看下面的例子:
crash> files 317933 PID: 317933 TASK: ffff8853a6c10fd0 CPU: 3 COMMAND: "ZMSSdu" ROOT: / CWD: /tmp FD FILE DENTRY INODE TYPE PATH 0 ffff882536c4c000 ffff88268e6bc480 ffff8827d88d73a0 CHR /dev/pts/32 1 ffff8853d5859500 ffff8828b700a240 ffff8857bf568850 CHR /dev/null 2 ffff8853d5859500 ffff8828b700a240 ffff8857bf568850 CHR /dev/null 3 ffff8816b12a8100 ffff8834653c26c0 ffff88518d4ef9c0 REG /var/log/zmsscmd/ZMSSdu.log 4 ffff88268f550200 ffff8811cadccd80 ffff880505b992f0 REG /mnt/ZMSS/ZMSSDu.log--------------------作为例子的文件 5 ffff8829a0cff000 ffff882759336fc0 ffff8817d73f9ae8 REG /mnt/ZMSS/ZMSSMultiPath_Du.log 6 ffff8829a0cfc800 ffff884e79e68780 ffff885412896da8 REG /ZMSS/etc/ZMSSDu/etc/SemLock.sys 7 ffff8829a0cfe900 ffff8852e44d1b00 ffff884962453130 REG /var/log/ZMSS/backtrace/ZMSSDu.log 9 ffff885353dd8c00 ffff8823a3c61740 ffff88135486ad28 REG /ZMSS/etc/ZMSSRRIProcessd/etc/du_info 10 ffff885353ddae00 ffff8823a3c600c0 ffff88135486b130 REG /ZMSS/etc/ZMSSRRIProcessd/etc/df_info 11 ffff885353dda800 ffff8823a3c60900 ffff88135486b538 REG /ZMSS/etc/ZMSSRRIProcessd/etc/iostat_info
根据打印,可以知道文件名为/mnt/ZMSS/ZMSSDu.log,那么假设根据给出的inode:ffff880505b992f0 ,怎么获取到它的全路径呢?
crash> inode.i_dentry ffff880505b992f0 i_dentry = { first = 0xffff8811cadcce30 } crash> struct -xo dentry.d_alias struct dentry { [0xb0] struct hlist_node d_alias; } crash> px 0xffff8811cadcce30-0xb0 $1 = 0xffff8811cadccd80-----------------这个就是inode对应的其中一个dentry的地址,和直接列出来的 0xffff8811cadccd80 是对得上的。
crash> dentry.d_inode ffff8811cadccd80--------可以再验证一下:
d_inode = 0xffff880505b992f0
开始往parent遍历啦:
crash> dentry.d_name,d_iname,d_parent ffff8811cadccd80 d_name = { { { hash = 3146792703, len = 10 }, hash_len = 46096465663 }, name = 0xffff8811cadccdb8 "ZMSSDu.log" } d_iname = "ZMSSDu.log\000e\000ion.confrr3FQB\000\000\000\000" d_parent = 0xffff8857b90d3ec0
遍历到的最后一层文件名是:ZMSSDu.log,然后根据d_parent 来遍历:
crash> dentry.d_name,d_iname,d_parent 0xffff8857b90d3ec0 d_name = { { { hash = 0, len = 1 }, hash_len = 4294967296 }, name = 0xffff8857b90d3ef8 "/" } d_iname = "/\000lockevents:clockevent61\000\000\000\000\000\000"------------这里面后面都是乱码,前面的/是对的,说明到了该挂载点的根目录 d_parent = 0xffff8857b90d3ec0----------d_parent 就是本身,说明到了挂载点的根目录
往下就不能遍历了,这个时候,我们需要回到inode去,找到他的挂载路径:
crash> inode.i_sb ffff880505b992f0 i_sb = 0xffff8857a1faa800 crash> super_block.s_mounts 0xffff8857a1faa800 s_mounts = { next = 0xffff8857aefbc070, prev = 0xffff8857aefbc370 } crash> struct -xo mount struct mount { [0x0] struct hlist_node mnt_hash; [0x10] struct mount *mnt_parent; [0x18] struct dentry *mnt_mountpoint;------------挂载点,其实就是一个dentry,又可以遍历啦 crash> struct mount 0xffff8857aefbc000 struct mount { mnt_hash = { next = 0x0, pprev = 0xffffc900305fc858 }, mnt_parent = 0xffff88587fe82b80, mnt_mountpoint = 0xffff8800354d8c00,--------对应挂载点的dentry crash> dentry.d_iname,d_parent 0xffff8800354d8c00------------继续遍历 d_iname = "ZMSS\000\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314" d_parent = 0xffff8800354d8cc0
我们找到了它的挂载点,文件名是ZMSS,绝对路径需要进一步往上遍历:
crash> dentry.d_name,d_iname,d_parent 0xffff8800354d8cc0 d_name = { { { hash = 7630445, len = 3 }, hash_len = 12892532333 }, name = 0xffff8800354d8cf8 "mnt" } d_iname = "mnt\000\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314\314" d_parent = 0xffff8857b08de9c0------------------------------继续遍历 crash> dentry.d_name,d_iname,d_parent 0xffff8857b08de9c0 d_name = { { { hash = 0, len = 1 }, hash_len = 4294967296 }, name = 0xffff8857b08de9f8 "/" } d_iname = "/\000h13\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" d_parent = 0xffff8857b08de9c0---------------d_parent和自身相等,说明又遇到了挂载的根目录了
至此,我们反向遍历完了/mnt/zxdfs/ZMSSDu.log,我们也没法确定我们找到的/就是最终的系统的根目录,所以还需要再确定一下:
crash> dentry.d_sb 0xffff8857b08de9c0 d_sb = 0xffff8857b8cfd800 crash> super_block.s_mounts 0xffff8857b8cfd800 s_mounts = { next = 0xffff88587fe82bf0, prev = 0xffff8857bb960f70 } crash> struct mount 0xffff8857bb960f00 struct mount { mnt_hash = { next = 0x0, pprev = 0xffffc900301bf318 }, mnt_parent = 0xffff8857bb960480, crash> struct mount.mnt_devname,mnt_parent 0xffff8857bb960480 mnt_devname = 0xffff8857bed630c0 "rootfs" mnt_parent = 0xffff8857bb960480
发现mnt_parent指向自己,说明肯定是根节点了。可以看出整个遍历过程很长很无趣。
如果对文件系统熟悉的人来说,内核中提供了一个函数来实现类似的过程,就是d_path:
char *d_path(const struct path *path, char *buf, int buflen) { char *res = buf + buflen; struct path root; int error; /* * We have various synthetic filesystems that never get mounted. On * these filesystems dentries are never used for lookup purposes, and * thus don't need to be hashed. They also don't need a name until a * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: * * Some pseudo inodes are mountable. When they are mounted * path->dentry == path->mnt->mnt_root. In that case don't call d_dname * and instead have d_path return the mounted path. */ if (path->dentry->d_op && path->dentry->d_op->d_dname && (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); rcu_read_lock(); get_fs_root_rcu(current->fs, &root); error = path_with_deleted(path, &root, &res, &buflen); rcu_read_unlock(); if (error < 0) res = ERR_PTR(error); return res; }
给定一个dentry,给定对应的mnt,就能唯一确定一个路径。