EXT文件系统
1、ext2_add_link的执行过程,函数的定义如下:
int ext2_add_link (struct dentry *dentry, struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode;//父节点的inode
const char *name = dentry->d_name.name;//目录名
int namelen = dentry->d_name.len;//名字长度
unsigned chunk_size = ext2_chunk_size(dir);//块大小
unsigned reclen = EXT2_DIR_REC_LEN(namelen);//4的倍数
unsigned short rec_len, name_len;
struct page *page = NULL;
ext2_dirent * de;
unsigned long npages = dir_pages(dir);//页数目
unsigned long n;
char *kaddr;
loff_t pos;
int err;
for (n = 0; n <= npages; n++) {//遍历每个page
char *dir_end;
page = ext2_get_page(dir, n);//从页缓存中读取页
err = PTR_ERR(page);
if (IS_ERR(page))
goto out;
lock_page(page);
kaddr = page_address(page);//取到page的线性地址
dir_end = kaddr + ext2_last_byte(dir, n);//最后一页不满一页
de = (ext2_dirent *)kaddr;
kaddr += PAGE_CACHE_SIZE - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
name_len = 0;
rec_len = chunk_size;
de->rec_len = ext2_rec_len_to_disk(chunk_size);
de->inode = 0;
goto got_it;
}
if (de->rec_len == 0) {
ext2_error(dir->i_sb, __FUNCTION__,
"zero-length directory entry");
err = -EIO;
goto out_unlock;
}
err = -EEXIST;
//如果名字匹配
if (ext2_match (namelen, name, de))
goto out_unlock;
name_len = EXT2_DIR_REC_LEN(de->name_len);
rec_len = ext2_rec_len_from_disk(de->rec_len);
//取得了一个没有用的目录相
if (!de->inode && rec_len >= reclen)
goto got_it;
//或者说这里是又一个间隔能放下该目录项
if (rec_len >= name_len + reclen)
goto got_it;
//没有找到就跳转到下一个位置
de = (ext2_dirent *) ((char *) de + rec_len);
}
unlock_page(page);
ext2_put_page(page);
}
BUG();
return -EINVAL;
got_it:
//pos有两种可能,第一是足够大的间隔前面的目录项,第二是废弃的目录项
pos = page_offset(page) +(char*)de - (char*)page_address(page);
//写块
err = __ext2_write_begin(NULL, page->mapping, pos, rec_len, 0,&page, NULL);
if (err)
goto out_unlock;
//如果是间隔前的目录项
if (de->inode) {
//de1为新目录项的开始位置
ext2_dirent *de1 = (ext2_dirent *) ((char *) de + name_len);
de1->rec_len = ext2_rec_len_to_disk(rec_len - name_len);
//修改原目录项指针
de->rec_len = ext2_rec_len_to_disk(name_len);
de = de1;
}
de->name_len = namelen;
memcpy(de->name, name, namelen);
de->inode = cpu_to_le32(inode->i_ino);
ext2_set_de_type (de, inode);
err = ext2_commit_chunk(page, pos, rec_len);
dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
EXT2_I(dir)->i_flags &= ~EXT2_BTREE_FL;
//标记为脏准备回写
mark_inode_dirty(dir);
out_put:
ext2_put_page(page);
out:
return err;
out_unlock:
unlock_page(page);
goto out_put;
}
大体的过程就是不断的遍历目录项的块,从中查找一个废弃的目录项或者是一个足够大的间隔。具体的执行过程从注释中就很好看懂了。
2、索引节点分配的过程ext2_new_inode,函数的具体定义如下:
//单纯地分配一个inode
struct inode *ext2_new_inode(struct inode *dir, int mode)
{
struct super_block *sb;
struct buffer_head *bitmap_bh = NULL;
struct buffer_head *bh2;
int group, i;
ino_t ino = 0;
struct inode * inode;
struct ext2_group_desc *gdp;
struct ext2_super_block *es;
struct ext2_inode_info *ei;
struct ext2_sb_info *sbi;
int err;
sb = dir->i_sb;
//分配VFS中的inode
inode = new_inode(sb);
if (!inode)
return ERR_PTR(-ENOMEM);
ei = EXT2_I(inode);
sbi = EXT2_SB(sb);
es = sbi->s_es;
/*
* 找到适合的块组
* 1、find_group_dir强制老式的方式
* 2、find_group_orlov尽量把目录项对应的普通文件的索引块放在同一个块组
* a、以文件系统根root为父目录的目录项应该分散在各个块组
* b、该组没有包含太多的目录&&该组有足够多的空闲索引节点&&该组有足够
* 多的空闲块&&该组有一点小“债”,嵌套目录就应被存放到父目录块组
* c、从包含父目录的块组开始选择第一个满足条件的块组,这个条件是:它
* 的空闲索引节点数比每块组空闲索引节点数的平均值大
* 3、如果新索引节点不是个目录,则调用find_group_other
* a、从包含父目录dir的块组开始,执行快速的对数查找
* b、如果该算法没有找到含有空闲索引节点的块组,就从包含父目录dir的
* 块组开始执行彻底的线性查找
*/
if (S_ISDIR(mode)) {
if (test_opt(sb, OLDALLOC))
group = find_group_dir(sb, dir);
else
group = find_group_orlov(sb, dir);
} else
group = find_group_other(sb, dir);
if (group == -1) {
err = -ENOSPC;
goto fail;
}
//设置位图,这个操作最多把所有的块组遍历一次
for (i = 0; i < sbi->s_groups_count; i++) {
gdp = ext2_get_group_desc(sb, group, &bh2);
brelse(bitmap_bh);
bitmap_bh = read_inode_bitmap(sb, group);
if (!bitmap_bh) {
err = -EIO;
goto fail;
}
ino = 0;
repeat_in_this_group:
//找到空闲的位
ino = ext2_find_next_zero_bit((unsigned long *)bitmap_bh->b_data,
EXT2_INODES_PER_GROUP(sb), ino);
//如果到了最后一个块组,那就回去尝试第一个块组
if (ino >= EXT2_INODES_PER_GROUP(sb)) {
if (++group == sbi->s_groups_count)
group = 0;
continue;
}
if (ext2_set_bit_atomic(sb_bgl_lock(sbi, group),ino, bitmap_bh->b_data)) {
if (++ino >= EXT2_INODES_PER_GROUP(sb)) {
if (++group == sbi->s_groups_count)
group = 0;
continue;
}
goto repeat_in_this_group;
}
goto got;
}
err = -ENOSPC;
goto fail;
got:
//设置位图的bh为脏的
mark_buffer_dirty(bitmap_bh);
if (sb->s_flags & MS_SYNCHRONOUS)
sync_dirty_buffer(bitmap_bh);
brelse(bitmap_bh);
ino += group * EXT2_INODES_PER_GROUP(sb) + 1;
if (ino < EXT2_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
ext2_error (sb, "ext2_new_inode",
"reserved inode or inode > inodes count - "
"block_group = %d,inode=%lu", group,
(unsigned long) ino);
err = -EIO;
goto fail;
}
percpu_counter_add(&sbi->s_freeinodes_counter, -1);
if (S_ISDIR(mode))
percpu_counter_inc(&sbi->s_dirs_counter);
spin_lock(sb_bgl_lock(sbi, group));
gdp->bg_free_inodes_count =cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
if (S_ISDIR(mode)) {
if (sbi->s_debts[group] < 255)
sbi->s_debts[group]++;
gdp->bg_used_dirs_count =
cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
} else {
if (sbi->s_debts[group])
sbi->s_debts[group]--;
}
spin_unlock(sb_bgl_lock(sbi, group));
sb->s_dirt = 1;
mark_buffer_dirty(bh2);
inode->i_uid = current->fsuid;
if (test_opt (sb, GRPID))
inode->i_gid = dir->i_gid;
else if (dir->i_mode & S_ISGID) {
inode->i_gid = dir->i_gid;
if (S_ISDIR(mode))
mode |= S_ISGID;
} else
inode->i_gid = current->fsgid;
//设置inode的属性
inode->i_mode = mode;
inode->i_ino = ino;
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
memset(ei->i_data, 0, sizeof(ei->i_data));
ei->i_flags = EXT2_I(dir)->i_flags & ~EXT2_BTREE_FL;
if (S_ISLNK(mode))
ei->i_flags &= ~(EXT2_IMMUTABLE_FL|EXT2_APPEND_FL);
if (!S_ISDIR(mode))
ei->i_flags &= ~EXT2_DIRSYNC_FL;
ei->i_faddr = 0;
ei->i_frag_no = 0;
ei->i_frag_size = 0;
ei->i_file_acl = 0;
ei->i_dir_acl = 0;
ei->i_dtime = 0;
ei->i_block_alloc_info = NULL;
ei->i_block_group = group;
ei->i_dir_start_lookup = 0;
ei->i_state = EXT2_STATE_NEW;
ext2_set_inode_flags(inode);
spin_lock(&sbi->s_next_gen_lock);
inode->i_generation = sbi->s_next_generation++;
spin_unlock(&sbi->s_next_gen_lock);
insert_inode_hash(inode);
if (DQUOT_ALLOC_INODE(inode)) {
err = -EDQUOT;
goto fail_drop;
}
err = ext2_init_acl(inode, dir);
if (err)
goto fail_free_drop;
err = ext2_init_security(inode,dir);
if (err)
goto fail_free_drop;
mark_inode_dirty(inode);
ext2_debug("allocating inode %lu\n", inode->i_ino);
ext2_preread_inode(inode);
return inode;
fail_free_drop:
DQUOT_FREE_INODE(inode);
fail_drop:
DQUOT_DROP(inode);
inode->i_flags |= S_NOQUOTA;
inode->i_nlink = 0;
iput(inode);
return ERR_PTR(err);
fail:
make_bad_inode(inode);
iput(inode);
return ERR_PTR(err);
}
这个函数总的过程还是很简单的,先分配VFS中的索引节点(这是最容易的事情了),然后找到适合的块组,从该块组的位图中找到空闲位。初始化索引节点的一些属性。最后把他们都标记成脏的,以便被写回硬盘。关于找块组的过程还是挺有意思的。