XV6学习(15)Lab mmap: Mmap
代码在Github上。
这一个实验是要实现最基础的mmap
功能。mmap即内存映射文件,将一个文件直接映射到内存当中,之后对文件的读写就可以直接通过对内存进行读写来进行,而对文件的同步则由操作系统来负责完成。使用mmap
可以避免对文件大量read
和write
操作带来的内核缓冲区和用户缓冲区之间的频繁的数据拷贝。在Kafka消息队列等软件中借助mmap
来实现零拷贝(zero-copy)。
首先定义vma
结构体用于保存内存映射信息,并在proc
结构体中加入struct vma *vma
指针:
#define NVMA 16
#define VMA_START (MAXVA / 2)
struct vma{
uint64 start;
uint64 end;
uint64 length; // 0 means vma not used
uint64 off;
int permission;
int flags;
struct file *file;
struct vma *next;
struct spinlock lock;
};
// Per-process state
struct proc {
...
struct vma *vma;
...
};
之后实现对vma
分配的代码:
struct vma vma_list[NVMA];
struct vma* vma_alloc(){
for(int i = 0; i < NVMA; i++){
acquire(&vma_list[i].lock);
if(vma_list[i].length == 0){
return &vma_list[i];
}else{
release(&vma_list[i].lock);
}
}
panic("no enough vma");
}
实现mmap
系统调用,这个函数主要就是申请一个vma
,之后查找一块空闲内存,填入相关信息,将vma
插入到进程的vma
链表中去:
uint64
sys_mmap(void)
{
uint64 addr;
int length, prot, flags, fd, offset;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0 || argint(2, &prot) < 0 || argint(3, &flags) < 0 || argint(4, &fd) < 0 || argint(5, &offset) < 0){
return -1;
}
if(addr != 0)
panic("mmap: addr not 0");
if(offset != 0)
panic("mmap: offset not 0");
struct proc *p = myproc();
struct file* f = p->ofile[fd];
int pte_flag = PTE_U;
if (prot & PROT_WRITE) {
if(!f->writable && !(flags & MAP_PRIVATE)) return -1; // map to a unwritable file with PROT_WRITE
pte_flag |= PTE_W;
}
if (prot & PROT_READ) {
if(!f->readable) return -1; // map to a unreadable file with PROT_READ
pte_flag |= PTE_R;
}
struct vma* v = vma_alloc();
v->permission = pte_flag;
v->length = length;
v->off = offset;
v->file = myproc()->ofile[fd];
v->flags = flags;
filedup(f);
struct vma* pv = p->vma;
if(pv == 0){
v->start = VMA_START;
v->end = v->start + length;
p->vma = v;
}else{
while(pv->next) pv = pv->next;
v->start = PGROUNDUP(pv->end);
v->end = v->start + length;
pv->next = v;
v->next = 0;
}
addr = v->start;
printf("mmap: [%p, %p)\n", addr, v->end);
release(&v->lock);
return addr;
}
接下来就可以在usertrap
中对缺页中断进行处理:查找进程的vma
链表,判断该地址是否为映射地址,如果不是就说明出错,直接返回;如果在vma
链表中,就可以申请并映射一个页面,之后根据vma
从对应的文件中读取数据:
int
mmap_handler(uint64 va, int scause)
{
struct proc *p = myproc();
struct vma* v = p->vma;
while(v != 0){
if(va >= v->start && va < v->end){
break;
}
//printf("%p\n", v);
v = v->next;
}
if(v == 0) return -1; // not mmap addr
if(scause == 13 && !(v->permission & PTE_R)) return -2; // unreadable vma
if(scause == 15 && !(v->permission & PTE_W)) return -3; // unwritable vma
// load page from file
va = PGROUNDDOWN(va);
char* mem = kalloc();
if (mem == 0) return -4; // kalloc failed
memset(mem, 0, PGSIZE);
if(mappages(p->pagetable, va, PGSIZE, (uint64)mem, v->permission) != 0){
kfree(mem);
return -5; // map page failed
}
struct file *f = v->file;
ilock(f->ip);
readi(f->ip, 0, (uint64)mem, v->off + va - v->start, PGSIZE);
iunlock(f->ip);
return 0;
}
之后就是munmap
的实现,同样先从链表中找到对应的vma
结构体,之后根据三种不同情况(头部、尾部、整个)来写回并释放对应的页面并更新vma
信息,如果整个区域都被释放就将vma
和文件释放。
uint64
sys_munmap(void)
{
uint64 addr;
int length;
if(argaddr(0, &addr) < 0 || argint(1, &length) < 0){
return -1;
}
struct proc *p = myproc();
struct vma *v = p->vma;
struct vma *pre = 0;
while(v != 0){
if(addr >= v->start && addr < v->end) break; // found
pre = v;
v = v->next;
}
if(v == 0) return -1; // not mapped
printf("munmap: %p %d\n", addr, length);
if(addr != v->start && addr + length != v->end) panic("munmap middle of vma");
if(addr == v->start){
writeback(v, addr, length);
uvmunmap(p->pagetable, addr, length / PGSIZE, 1);
if(length == v->length){
// free all
fileclose(v->file);
if(pre == 0){
p->vma = v->next; // head
}else{
pre->next = v->next;
v->next = 0;
}
acquire(&v->lock);
v->length = 0;
release(&v->lock);
}else{
// free head
v->start -= length;
v->off += length;
v->length -= length;
}
}else{
// free tail
v->length -= length;
v->end -= length;
}
return 0;
}
写回函数先判断是否需要写回,当需要写回时就仿照filewrite
的实现,将数据写回到对应的文件当中去,这里的实现是直接写回所有页面,但实际可以根据PTE_D
来判断内存是否被写入,如果没有写入就不用写回:
void
writeback(struct vma* v, uint64 addr, int n)
{
if(!(v->permission & PTE_W) || (v->flags & MAP_PRIVATE)) // no need to writeback
return;
if((addr % PGSIZE) != 0)
panic("unmap: not aligned");
printf("starting writeback: %p %d\n", addr, n);
struct file* f = v->file;
int max = ((MAXOPBLOCKS-1-1-2) / 2) * BSIZE;
int i = 0;
while(i < n){
int n1 = n - i;
if(n1 > max)
n1 = max;
begin_op();
ilock(f->ip);
printf("%p %d %d\n",addr + i, v->off + v->start - addr, n1);
int r = writei(f->ip, 1, addr + i, v->off + v->start - addr + i, n1);
iunlock(f->ip);
end_op();
i += r;
}
}
最后就是在fork
当中复制vma
到子进程,在exit
中当前进程的vma
链表释放,在exit
时要对页面进行写回:
int
fork(void)
{
...
np->state = RUNNABLE;
np->vma = 0;
struct vma *pv = p->vma;
struct vma *pre = 0;
while(pv){
struct vma *vma = vma_alloc();
vma->start = pv->start;
vma->end = pv->end;
vma->off = pv->off;
vma->length = pv->length;
vma->permission = pv->permission;
vma->flags = pv->flags;
vma->file = pv->file;
filedup(vma->file);
vma->next = 0;
if(pre == 0){
np->vma = vma;
}else{
pre->next = vma;
}
pre = vma;
release(&vma->lock);
pv = pv->next;
}
...
}
void
exit(int status)
{
struct proc *p = myproc();
if(p == initproc)
panic("init exiting");
// munmap all mmap vma
struct vma* v = p->vma;
struct vma* pv;
while(v){
writeback(v, v->start, v->length);
uvmunmap(p->pagetable, v->start, PGROUNDUP(v->length) / PGSIZE, 1);
fileclose(v->file);
pv = v->next;
acquire(&v->lock);
v->next = 0;
v->length = 0;
release(&v->lock);
v = pv;
}
...
}