MIT 6.S081 2021: Lab mmap

mmap

mmap就是把指定的文件fd映射到进程内存空间的某一个部分，映射建立之后，进程读写这块内存就像是在读写文件一样。按照提示来做实验：

Implement mmap: find an unused region in the process's address space in which to map the file, and add a VMA to the process's table of mapped regions. The VMA should contain a pointer to a struct file for the file being mapped; mmap should increase the file's reference count so that the structure doesn't disappear when the file is closed (hint: see filedup). Run mmaptest: the first mmap should succeed, but the first access to the mmap-ed memory will cause a page fault and kill mmaptest.

这就是mmap的设计思路了。首先是在进程的地址空间里“找到一个未使用的区域”。我们能想到的，肯定可以合法访问的”未使用的区域“只有堆的顶部p->sz之上的地址了。lab lazy里的sbrk()函数就是通过对p->sz作修改来对进程的堆空间做伸缩操作。因此mmap的区域一定是基于p->sz进行操作的。

然后要求给进程加入一个VMA表。VMA表里面存储了每次调用mmap()得到的映射空间的信息。每个进程都有一个VMA表，理所当然的我们应该把它放在struct proc中。VMA的结构如下：

typedef struct vma{
  struct file* mmapfile;	//mmap()函数打开的文件的指针
  struct inode* ip;			//指向上述文件的inode
  uint64 mmapaddr;			//mmap()分配的映射起始地址
  uint64 mmapend;			//映射结束的位置
  uint64 mmlength;			//映射的剩余长度。注意这个值是会变化的
  int mmprot;				//mmap()参数里指定的prot
  int mmflag;				//mmap()参数里指定的flag
  int valid;				//该项是否空闲。若不空闲则置为1，反之置为0
}vma;

把下面这个数组加到struct proc里面：

vma map_region[16];

然后开始设计sys_mmap系统调用。按照提示，mmap()不能分配页面，不能读入文件，这些操作必须依靠page fault执行。也就是这些操作应该写在usertrap()里面。mmap()的主要工作就是分配地址，思路如下：

通过p->trapframe传入mmap()的参数。注意这个lab中addr和offset就是0，不用传参。

在map_region里面找一个空位置，先把已经确定的参数写入这个位置。这里使用一个简单的mapalloc()函数，如果找不到空位置就返回-1:

static int
mapalloc()
{
  int i;
  struct proc *p = myproc();
  for(i = 0; i < NOFILE; i++){
    if(p->map_region[i].valid == 0){
      p->map_region[i].valid=1;
      return i;
    }
  }
  return -1;
}

使用filedup()增加映射文件的引用数。
为映射区域找一个起始地址addr，写入mmapaddr和mmapend，直接返回addr。

sys_mmap()代码如下：

uint64 sys_mmap(void)
{
  struct proc *p = myproc();
  //传入参数
  uint64 fail=(uint64)((char*)-1);
  uint64 addr;
  uint64 length=p->trapframe->a1;
  int prot=p->trapframe->a2;
  int flags=p->trapframe->a3;
  int fd=p->trapframe->a4;

  //检查打开的文件。如果是read-only文件开启了MAP_SHARED，则必须返回错误
  if((p->ofile[fd]->writable)==0 && (flags&MAP_SHARED)&&(prot&PROT_WRITE)){
    return fail;
  }

  //在map_region里面找到一个空位
  int idx=mapalloc();
  //printf("%d idx\n",idx);
  //初始化
  p->map_region[idx].mmlength=length;
  p->map_region[idx].mmprot=prot;
  p->map_region[idx].mmflag=flags;
  p->map_region[idx].mmapfile=p->ofile[fd];
  p->map_region[idx].ip=p->ofile[fd]->ip;
  //file ref++
  filedup(p->ofile[fd]);

  //寻找一个地址
  addr=PGROUNDUP(p->sz);
  p->sz+=PGROUNDUP(length);
  //p确定mmap的范围
  p->map_region[idx].mmapaddr=addr;
  p->map_region[idx].mmapend=addr+PGROUNDUP(length);
  //printf("mmap range %p---%p\n",p->map_region[idx].mmapaddr,p->map_region[idx].mmapend);
  return addr;
}

这里说明一下：

mmaptest会检查对只读文件的映射这种情况。如果一个文件以O_RDONLY打开，那么如果同时开启MAP_SHARED和PROT_WRITE就意味着：映射区域可写，修改过的映射区域需要写回文件，这和O_RDONLY是冲突的，必须返回错误值0xffffffffffffffff。
如何寻找映射地址：这里使用PGROUNDUP(p->sz)作为映射的起始地址addr。为映射分配PGROUNDUP(length)个字节的空间，这样的话映射空间的地址就可以对齐页表。然后立刻把p->sz加上PGROUNDUP(length)，不能拖到usertrap()里面再加。否则，当进程连续多次调用mmap()时，mmap()每次分配的会是相同的起始地址，所有映射空间会互相覆盖。

现在设计page fault的机制。这里仿照lab lazy的解决方案即可：

void
usertrap(void)
{
  int which_dev = 0;

  if((r_sstatus() & SSTATUS_SPP) != 0)
    panic("usertrap: not from user mode");

  // send interrupts and exceptions to kerneltrap(),
  // since we're now in the kernel.
  w_stvec((uint64)kernelvec);

  struct proc *p = myproc();
  
  // save user program counter.
  p->trapframe->epc = r_sepc();
  
  if(r_scause() == 8){
    // system call

    if(p->killed)
      exit(-1);

    // sepc points to the ecall instruction,
    // but we want to return to the next instruction.
    p->trapframe->epc += 4;

    // an interrupt will change sstatus &c registers,
    // so don't enable until done with those registers.
    intr_on();

    syscall();
  } else if((which_dev = devintr()) != 0){
    // ok
  } 
  else if(r_scause()==13||r_scause()==15)
  {
    uint64 stval=r_stval();
    
    //找到stval在哪个map区域里面    
    int idx=findmap(stval);
   
    if(idx>=0)
    {
      int PTEword=PTE_U;
      int prot=(p->map_region[idx]).mmprot;
      uint64 length=(p->map_region[idx]).mmlength;
      //设置关键字
      if(prot&PROT_READ)
        PTEword|=PTE_R;
      if(prot&PROT_WRITE)
        PTEword|=PTE_W;      
      if(prot&PROT_EXEC)
        PTEword|=PTE_X;      
      
      uint64 sz=(p->map_region[idx]).mmapaddr;
      uint64 newsz=(p->map_region[idx]).mmapend;
      if((newsz=mmapalloc(p->pagetable, sz, newsz,PTEword))==0){
        printf("allocate error");
      }
      
      struct inode* ip=p->map_region[idx].ip;
      ilock(ip);
      readi(ip,1,(p->map_region[idx]).mmapaddr,0,length);
      iunlock(ip);
      
    } 
    else{
      p->killed = 1;	//没找到vma务必记得kill进程
    }  
  }
  else {
    printf("usertrap(): unexpected scause %p pid=%d\n", r_scause(), p->pid);
    printf("            sepc=%p stval=%p\n", r_sepc(), r_stval());
    p->killed = 1;
  }

  if(p->killed)
    exit(-1);

  // give up the CPU if this is a timer interrupt.
  if(which_dev == 2)
    yield();

  usertrapret();
}

首先是检测page fault。r_scause()等于13或15的时候说明发生了page fault，触发page fault的虚拟地址可以通过r_stval()获取。出现之后先使用findmap()在vma表里搜索该地址对应的映射。findmap()的原理很简单：传入一个地址addr，遍历vma表，看addr在哪个项的映射区间里即可，如果没找到返回-1：
```
int findmap(uint64 addr)
{
  struct proc *p = myproc();
  int i;
  for(i=0;i<16;i++)
  {
    uint64 a=p->map_region[i].mmapaddr;
    uint64 b=p->map_region[i].mmapend;
    if(addr>=a && addr<b){
      return i;
    }   
  }
  return -1;
}
```
这里务必要注意一个细节：在本lab中，只有当进程访问了mmap()的未调入文件的映射空间时，page fault才是正常的。其他任何导致page fault的情况都是异常的。因此，如果findmap返回-1，说明进程访问了页表里不存在的、不该访问的地址空间，应该将其终止。（usertests里面的kernmem会检查这种情况，我费了好大力气才找到原因）

然后分配内存即可，这里如果产生了page fault，就直接把整个文件调入内存。稍微修改一下uvmalloc()得到mmapalloc()，它可以指定page的PTE项。mmapalloc()还有一个好处就是可以确保分配连续的虚拟内存空间。

uint64
mmapalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz, int prot)
{
  char *mem;
  uint64 a;

  if(newsz < oldsz)
    return oldsz;

  //oldsz = PGROUNDUP(oldsz);
  for(a = oldsz; a < newsz; a += PGSIZE){
    //printf("maphere\n");
    mem = kalloc();
    if(mem == 0){
      uvmdealloc(pagetable, a, oldsz);
      return 0;
    }
    memset(mem, 0, PGSIZE);

    if(mappages(pagetable, a, PGSIZE, (uint64)mem, prot) != 0){
      kfree(mem);
      uvmdealloc(pagetable, a, oldsz);
      return 0;
    }
  }
  return newsz;
}

使用readi读入inode的数据到映射起始地址即可。注意为inode加上读写锁。

munmap

munmap()需要解除mmap()的映射。继续按照提示实现：

Implement munmap: find the VMA for the address range and unmap the specified pages (hint: use uvmunmap). If munmap removes all pages of a previous mmap, it should decrement the reference count of the corresponding struct file. If an unmapped page has been modified and the file is mapped MAP_SHARED, write the page back to the file. Look at filewrite for inspiration.

这里需要注意，munmap不一定会释放整个映射。不过munmap()会按照从低到高的顺序，从剩余映射的起始释放：

An munmap call might cover only a portion of an mmap-ed region, but you can assume that it will either unmap at the start, or at the end, or the whole region (but not punch a hole in the middle of a region).

munmap的思路还是很容易的：先使用findmap()找到对应的vma项。如果mmap()指定了MAP_SHARED，需要用filewrite()把映射里的修改写回文件。使用PGROUNDUP(length)/PGSIZE计算从addr开始需要释放的页数。

使用uvmunmap()释放相应的页，并将p->map_region[idx].mmlength减去length项。如果mmlength为0，说明映射已经彻底释放了，使用fileclose()关闭对应文件，并使用memset()把vma项清零。

uint64 sys_munmap(void)
{
  struct proc *p = myproc();
  uint64 addr=p->trapframe->a0;
  uint64 length=p->trapframe->a1;
  //printf("unmap %p:addr %p:length\n",addr,length);
  int idx=findmap(addr);
  if(idx<0)
  {
    return -1;
  }
  int npages=PGROUNDUP(length)/PGSIZE;
  //如果设置了MAP_SHARED
  if(p->map_region[idx].mmflag & MAP_SHARED)
  {
    //printf("reach here1\n");
    filewrite(p->map_region[idx].mmapfile, addr, length);
  }
  //printf("reach here2\n");
  uvmunmap(p->pagetable,addr,npages,1);

  p->map_region[idx].mmlength-=length;
  if(p->map_region[idx].mmlength==0)
  {
    fileclose(p->map_region[idx].mmapfile);
  //清除表项
    memset((void*)&p->map_region[idx],0,sizeof(vma));
  }

  return 0;
}

fork

修改fork()和exit()，让子进程拥有父进程的映射空间，实现效果是这样的:

不过本实验允许子进程不和父进程共享物理页面，只需要让两个进程的映射空间映射到同一个文件就可以了。

所以很简单，直接在fork()的时候把vmap表复制到子进程就可以了。注意：Don't forget to increment the reference count for a VMA's struct file. 把如下代码插入fork()：

memmove(&np->map_region, &p->map_region,sizeof(vma)*16); 
  for(int idx=0;idx<16;idx++)
  {
    if(p->map_region[idx].valid!=0)//如果存在映射
    {
      filedup(p->map_region[idx].mmapfile);
    }
  }

还有一个问题：fork()复制页表的时候会调用uvmcopy()来复制父进程的页。如果只用上面的代码，会出现panic：uvmcopy: page not present。这是因为：调用mmap之后我们已经扩大了sz，但如果没有访问映射地址的话，映射地址内是没有合法的页的，这时sz里会包含PTE_V==0的页。uvmcopy会复制0到sz的所有页表，因此会访问这些尚未通过page fault载入的页面，从而panic。

解决方法很简单：uvmcopy检查PTE_V的时候直接跳过，不执行复制即可。稍微修改一下uvmcopy()得到mmapcopy()，把fork()里面的uvmcopy换掉即可：

for(i = 0; i < sz; i += PGSIZE){
    if((pte = walk(old, i, 0)) == 0)
      panic("uvmcopy: pte should exist");
    if((*pte & PTE_V) == 0)
    {
      continue;
    }
      //panic("uvmcopy: page not present");
    pa = PTE2PA(*pte);
    flags = PTE_FLAGS(*pte);
    if((mem = kalloc()) == 0)
      goto err;
    memmove(mem, (char*)pa, PGSIZE);
    if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){
      kfree(mem);
      goto err;
    }
  }

exit()需要释放所有的映射。

遍历vma表。因为映射长度不一定是初始时的长度，所以要计算出剩余映射空间的起始位置offset.因为本实验中所有munmap的区域都是从低到高紧密连接的，所以offset到mmapend的距离一定是剩余的length。

for(int idx=0;idx<16;idx++)
  {
    if(p->map_region[idx].valid!=0)//如果存在vma
    {
      uint64 len=p->map_region[idx].mmlength;
      uint64 offset=p->map_region[idx].mmapend-len;
      fileclose(p->map_region[idx].mmapfile);
      uvmunmap(p->pagetable,offset,PGROUNDUP(len)/PGSIZE,1);
      memset((void*)&p->map_region[idx],0,sizeof(vma));
    }
  }

这里uvmunmap也会出现panic，修改一下uvmunmap跳过PTE_V的检测：（其实这里也应该像上面一样新定义一个函数）

for(a = va; a < va + npages*PGSIZE; a += PGSIZE){
    if((pte = walk(pagetable, a, 0)) == 0)
      panic("uvmunmap: walk");
    if((*pte & PTE_V) == 0)
    {
      return;
    }
      //panic("uvmunmap: not mapped");
    if(PTE_FLAGS(*pte) == PTE_V)
      panic("uvmunmap: not a leaf");
    if(do_free){
      uint64 pa = PTE2PA(*pte);
      kfree((void*)pa);
    }
    *pte = 0;
  }

posted @ 2021-11-22 23:15 LunaCancer 阅读(1137) 评论(2) 编辑收藏举报

刷新页面返回顶部

LunaCancer

MIT 6.S081 2021: Lab mmap

mmap

munmap

fork

公告