xv6——文件系统:FS的布局和inode的读写操作
作者:殷某人
更新时间:2022/07/08
框架图
文件系统在磁盘上的布局
block块管理: free bitmap区
使用每一个比特表示磁盘上的一个block块是否可用。
inode保存数据的结构
使用两层的结构进行数据存储。
数据结构定义
磁盘上的保存的数据结构
超级块对象
每一个文件系统都必须实现超级块,用于保存文件系统的全局信息。
struct superblock {
uint size; // Size of file system image (blocks)
uint nblocks; // Number of data blocks
uint ninodes; // Number of inodes.
uint nlog; // Number of log blocks
uint logstart; // Block number of first log block
uint inodestart; // Block number of first inode block
uint bmapstart; // Block number of first free map block
}
dinode 结构
保存磁盘上的inode对应的结构体:
struct dinode {
short type; // inode的类型,包含:
short major; // Major device number (T_DEV only)
short minor; // Minor device number (T_DEV only)
short nlink; // Number of links to inode in file system
uint size; // 该inode对应的数据的大小
uint addrs[NDIRECT+1]; // 该inode对应的数据的block块的位置. 前NDIRECT个block直接保存用户数据,
// 最后一个block里又保存了NDIRECT个block块的块号, 相当于两层,了,所以
// 文件最大为:512B * 12 + 512B * 12 = 12KB.
};
目录项的结构
在linux操作系统上,目录也是一种文件, 它里面保存的不是用户数据,而是目录项列表,指它包含了哪些文件或目录。目录项(directory entity ) 由inum 和 名字组成。
struct dirent {
ushort inum; // 目录或文件对应inode的磁盘上的索引号,即第几个inode。
char name[DIRSIZ]; // 名字字符串
};
内存中的数据结构
inode 结构
inode数据结构是磁盘上dinode结构在内存中的映射, inode的结构如下所示:
struct inode {
uint dev; // Device number
uint inum; // 对应的磁盘上的dinode索引号(即第几个dinode)
int ref; // Reference count
struct sleeplock lock; // protects everything below here
int valid; // inode has been read from disk?
short type; // copy of disk inode
short major;
short minor;
short nlink;
uint size;
uint addrs[NDIRECT+1];
};
inode在内存中的缓存块定义如下:
struct {
struct spinlock lock;
struct inode inode[NINODE];
} icache;
函数接口
对block块的操作
读到超级块到内存中
void
readsb(int dev, struct superblock *sb)
{
struct buf *bp;
bp = bread(dev, 1);
memmove(sb, bp->data, sizeof(*sb));
brelse(bp);
}
从磁盘上申请一个空闲的block块
- 读取bitmap区,查找到空闲的block块的索引
- 更新bitmap区
- 零值初始化block块,返回block块的索引号。
static uint
balloc(uint dev)
{
int b, bi, m;
struct buf *bp;
bp = 0;
for(b = 0; b < sb.size; b += BPB){
bp = bread(dev, BBLOCK(b, sb));
for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0){ // Is block free?
bp->data[bi/8] |= m; // Mark block in use.
log_write(bp);
brelse(bp);
bzero(dev, b + bi);
return b + bi;
}
}
brelse(bp);
}
panic("balloc: out of blocks");
}
释放磁盘上的一个block块
- 与申请过程正好相反。
static void
bfree(int dev, uint b)
{
struct buf *bp;
int bi, m;
bp = bread(dev, BBLOCK(b, sb));
bi = b % BPB;
m = 1 << (bi % 8);
if((bp->data[bi/8] & m) == 0)
panic("freeing free block");
bp->data[bi/8] &= ~m;
log_write(bp);
brelse(bp);
}
对inode的操作
初始化inode 缓存区
- 初始化inode缓存区的锁
- 读到超级块到内存中
void
iinit(int dev)
{
int i = 0;
initlock(&icache.lock, "icache");
for(i = 0; i < NINODE; i++) {
initsleeplock(&icache.inode[i].lock, "inode");
}
readsb(dev, &sb);
cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\
inodestart %d bmap start %d\n", sb.size, sb.nblocks,
sb.ninodes, sb.nlog, sb.logstart, sb.inodestart,
sb.bmapstart);
}
在内存的inode缓存区中查找未使用的inode块
static struct inode*
iget(uint dev, uint inum)
{
struct inode *ip, *empty;
acquire(&icache.lock);
// Is the inode already cached?
empty = 0;
for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
ip->ref++;
release(&icache.lock);
return ip;
}
if(empty == 0 && ip->ref == 0) // Remember empty slot.
empty = ip;
}
// Recycle an inode cache entry.
if(empty == 0)
panic("iget: no inodes");
ip = empty;
ip->dev = dev;
ip->inum = inum;
ip->ref = 1;
ip->valid = 0;
release(&icache.lock);
return ip;
}
从磁盘上申请一个空闲的dinode, 返回inode指针
- 在磁盘的inode区进行查找,如果inode的type == 0, 表示未使用。
- 通过调用iget()函数获取到空闲inode在内存中的映射。
struct inode*
ialloc(uint dev, short type)
{
int inum;
struct buf *bp;
struct dinode *dip;
for(inum = 1; inum < sb.ninodes; inum++){
bp = bread(dev, IBLOCK(inum, sb));
dip = (struct dinode*)bp->data + inum%IPB;
if(dip->type == 0){ // a free inode
memset(dip, 0, sizeof(*dip));
dip->type = type;
log_write(bp); // mark it allocated on the disk
brelse(bp);
return iget(dev, inum);
}
brelse(bp);
}
panic("ialloc: no inodes");
}
把内存中的inode信息写入到磁盘中
- 读到inode在磁盘中的block块
- 更新内容并写入磁盘。
void
iupdate(struct inode *ip)
{
struct buf *bp;
struct dinode *dip;
bp = bread(ip->dev, IBLOCK(ip->inum, sb));
dip = (struct dinode*)bp->data + ip->inum%IPB;
dip->type = ip->type;
dip->major = ip->major;
dip->minor = ip->minor;
dip->nlink = ip->nlink;
dip->size = ip->size;
memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
log_write(bp);
brelse(bp);
}
把磁盘上的inode同步到内存中并加锁
void
ilock(struct inode *ip)
{
struct buf *bp;
struct dinode *dip;
if(ip == 0 || ip->ref < 1)
panic("ilock");
acquiresleep(&ip->lock);
if(ip->valid == 0){
bp = bread(ip->dev, IBLOCK(ip->inum, sb));
dip = (struct dinode*)bp->data + ip->inum%IPB;
ip->type = dip->type;
ip->major = dip->major;
ip->minor = dip->minor;
ip->nlink = dip->nlink;
ip->size = dip->size;
memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
brelse(bp);
ip->valid = 1;
if(ip->type == 0)
panic("ilock: no type");
}
}
inode节点,增加内存中的引用次数
- 唯一的操作就是++ref.
struct inode*
idup(struct inode *ip)
{
acquire(&icache.lock);
ip->ref++;
release(&icache.lock);
return ip;
}
释放不使用的inode, 如果inode不再被使用,要释放掉inode关联的数据块。
- 删除inode关联的数据块是通过
itrunc函数
完成的。
// itrunc
static void
itrunc(struct inode *ip)
{
int i, j;
struct buf *bp;
uint *a;
for(i = 0; i < NDIRECT; i++){
if(ip->addrs[i]){
bfree(ip->dev, ip->addrs[i]);
ip->addrs[i] = 0;
}
}
if(ip->addrs[NDIRECT]){
bp = bread(ip->dev, ip->addrs[NDIRECT]);
a = (uint*)bp->data;
for(j = 0; j < NINDIRECT; j++){
if(a[j])
bfree(ip->dev, a[j]);
}
brelse(bp);
bfree(ip->dev, ip->addrs[NDIRECT]);
ip->addrs[NDIRECT] = 0;
}
ip->size = 0;
iupdate(ip);
}
void
iput(struct inode *ip)
{
acquiresleep(&ip->lock);
if(ip->valid && ip->nlink == 0){
acquire(&icache.lock);
int r = ip->ref;
release(&icache.lock);
if(r == 1){
// inode has no links and no other references: truncate and free.
itrunc(ip);
ip->type = 0;
iupdate(ip);
ip->valid = 0;
}
}
releasesleep(&ip->lock);
acquire(&icache.lock);
ip->ref--;
release(&icache.lock);
}
把inode对应的数据区的第m个block块映射到对应的磁盘的第n个block块。
特别注意: 如果inode对应的数据的block块不存在时,会向磁盘申请一个block块。也就是说,它存在扩容的情况。
static uint
bmap(struct inode *ip, uint bn)
{
uint addr, *a;
struct buf *bp;
if(bn < NDIRECT){
if((addr = ip->addrs[bn]) == 0)
ip->addrs[bn] = addr = balloc(ip->dev);
return addr;
}
bn -= NDIRECT;
if(bn < NINDIRECT){
// Load indirect block, allocating if necessary.
if((addr = ip->addrs[NDIRECT]) == 0)
ip->addrs[NDIRECT] = addr = balloc(ip->dev);
bp = bread(ip->dev, addr);
a = (uint*)bp->data;
if((addr = a[bn]) == 0){
a[bn] = addr = balloc(ip->dev);
log_write(bp);
}
brelse(bp);
return addr;
}
panic("bmap: out of range");
}
读inode的数据区
- off, 表示读取的位置相对于起始位置的偏移。
- n, 要读取的字节数
- dst, 目的缓存区
int
readi(struct inode *ip, char *dst, uint off, uint n)
{
uint tot, m;
struct buf *bp;
if(ip->type == T_DEV){
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
return -1;
return devsw[ip->major].read(ip, dst, n);
}
if(off > ip->size || off + n < off)
return -1;
if(off + n > ip->size)
n = ip->size - off;
for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
bp = bread(ip->dev, bmap(ip, off/BSIZE));
m = min(n - tot, BSIZE - off%BSIZE);
memmove(dst, bp->data + off%BSIZE, m);
brelse(bp);
}
return n;
}
写inode的数据区
- 该操作会修改inode的数据大小。
writei(struct inode *ip, char *src, uint off, uint n)
{
uint tot, m;
struct buf *bp;
if(ip->type == T_DEV){
if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
return -1;
return devsw[ip->major].write(ip, src, n);
}
if(off > ip->size || off + n < off)
return -1;
if(off + n > MAXFILE*BSIZE)
return -1;
for(tot=0; tot<n; tot+=m, off+=m, src+=m){
bp = bread(ip->dev, bmap(ip, off/BSIZE));
m = min(n - tot, BSIZE - off%BSIZE);
memmove(bp->data + off%BSIZE, src, m);
log_write(bp);
brelse(bp);
}
if(n > 0 && off > ip->size){
ip->size = off;
iupdate(ip);
}
return n;
}
对目录项的操作
在给定的目录的inode下, 查找指定的目录项。
- 代码中认为, dirent.inum = 0 , 为空目录项,跳过。
- 通过比较目录项的name 来判断是否相同。
struct inode*
dirlookup(struct inode *dp, char *name, uint *poff)
{
uint off, inum;
struct dirent de;
if(dp->type != T_DIR)
panic("dirlookup not DIR");
for(off = 0; off < dp->size; off += sizeof(de)){
if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("dirlookup read");
if(de.inum == 0)
continue;
if(namecmp(name, de.name) == 0){
// entry matches path element
if(poff)
*poff = off;
inum = de.inum;
return iget(dp->dev, inum);
}
}
return 0;
}
在给定的目录下,增加一个新的目录项。
int
dirlink(struct inode *dp, char *name, uint inum)
{
int off;
struct dirent de;
struct inode *ip;
// Check that name is not present.
if((ip = dirlookup(dp, name, 0)) != 0){
iput(ip);
return -1;
}
// Look for an empty dirent.
for(off = 0; off < dp->size; off += sizeof(de)){
if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("dirlink read");
if(de.inum == 0)
break;
}
strncpy(de.name, name, DIRSIZ);
de.inum = inum;
if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
panic("dirlink");
return 0;
}
对文件路径的操作
路径字符串操作,选出字符串第一个element, 返回其余的。
// Copy the next path element from path into name.
// Return a pointer to the element following the copied one.
// The returned path has no leading slashes,
// so the caller can check *path=='\0' to see if the name is the last one.
// If no name to remove, return 0.
//
// Examples:
// skipelem("a/bb/c", name) = "bb/c", setting name = "a"
// skipelem("///a//bb", name) = "bb", setting name = "a"
// skipelem("a", name) = "", setting name = "a"
// skipelem("", name) = skipelem("////", name) = 0
//
static char*
skipelem(char *path, char *name)
{
char *s;
int len;
while(*path == '/')
path++;
if(*path == 0)
return 0;
s = path;
while(*path != '/' && *path != 0)
path++;
len = path - s;
if(len >= DIRSIZ)
memmove(name, s, DIRSIZ);
else {
memmove(name, s, len);
name[len] = 0;
}
while(*path == '/')
path++;
return path;
}
返回给定路径的inode
- 调用 namex 完成真正的工作。
struct inode*
namei(char *path)
{
char name[DIRSIZ];
return namex(path, 0, name);
}
返回给定路径的inode的上一层inode.
- 调用 namex 完成真正的工作。
struct inode*
nameiparent(char *path, char *name)
{
return namex(path, 1, name);
}
返回给定路径的inode或者该inode的上一层inode.
- 至于返回inode还是inode的上一层inode, 由参数nameiparent决定。
static struct inode*
namex(char *path, int nameiparent, char *name)
{
struct inode *ip, *next;
if(*path == '/')
ip = iget(ROOTDEV, ROOTINO);
else
ip = idup(myproc()->cwd);
while((path = skipelem(path, name)) != 0){
ilock(ip);
if(ip->type != T_DIR){
iunlockput(ip);
return 0;
}
if(nameiparent && *path == '\0'){
// Stop one level early.
iunlock(ip);
return ip;
}
if((next = dirlookup(ip, name, 0)) == 0){
iunlockput(ip);
return 0;
}
iunlockput(ip);
ip = next;
}
if(nameiparent){
iput(ip);
return 0;
}
return ip;
}