MIT xv6 2020系列实验:Lab3 page tables
最重量级的一个,做完这个后面感觉浑身轻松。学校魔改版本的比这个还难一些。
1 Print a page table (easy)
easy难度,打印页表函数,在后面的debug部分很有用。
先找到vmprint应该被执行的地方,在xv6的exec函数里对进程进行判断,如果pid为1说明是初始进程应该打印页表:
exec.c:
proc_freepagetable(oldpagetable, oldsz);
if(p->pid == 1)
vmprint(p->pagetable);
uvmunmap(p->k_pagetable, 0, PGROUNDUP(oldsz)/PGSIZE, 0);
if(sync_pagetable(p, 0, p->sz) < 0)
这样即可。
接下来是vmprint函数,和递归部分分开,因为要打印一个表头:
vm.c:
void vmprint(pagetable_t pagetable){
printf("page table %p\n",PTE2PA(*pagetable));
vm_depth_output(pagetable,1,0);
return;
}
接下来再写完vm_depth_output函数。pagetable的实际含义是一个指向页表数组起始位置的指针,一个页表数组的大小是4KB,也就包含了512个指针,这512个指针可以指向次级页表物理地址也可以指向实际页面的物理地址。虚拟地址就是遍历过程中指针编号从左至右依次连接形成的二进制地址。每个递归内对512个指针进行遍历,根据pte的有效位判断其是否为叶子节点,是就打印页表项,否则打印页表项外还需继续递归。
vm.c:
void vm_depth_output(pagetable_t pagetable,int depth,uint64 your_addr){
for (int i = 0; i < 512; i++) {
pte_t pte = pagetable[i];
if ((pte & PTE_V) && (pte & (PTE_R | PTE_W | PTE_X)) == 0) {
// this PTE points to a lower-level page table.
uint64 child = PTE2PA(pte);
if(depth==1)
printf("..%d: pte %p pa %p\n",i,pte,PTE2PA(pte));
else
printf(".. ..%d: pte %p pa %p\n",i,pte,PTE2PA(pte));
vm_depth_output((pagetable_t)child,depth+1,your_addr | (i << 9*(3-depth)));
} else if (pte & PTE_V) {
printf(".. .. ..%d: pte %p pa %p\n",i,pte,PTE2PA(pte));
}
}
return;
}
2 A kernel page table per process (hard)
最难且重量级的一项放在lab3,后面的实验就都是水怪实验。创建一个内核页表,我们为proc额外初始化一个k_pagetable与kstack_pa:
proc.h:
struct proc {
struct spinlock lock;
// p->lock must be held when using these:
enum procstate state; // Process state
struct proc *parent; // Parent process
void *chan; // If non-zero, sleeping on chan
int killed; // If non-zero, have been killed
int xstate; // Exit status to be returned to parent's wait
int pid; // Process ID
// these are private to the process, so p->lock need not be held.
uint64 kstack; // Virtual address of kernel stack
uint64 sz; // Size of process memory (bytes)
pagetable_t pagetable; // User page table
pagetable_t k_pagetable; // Kernel page table
uint64 kstack_pa; // Physical address of kernel stack
struct trapframe *trapframe; // data page for trampoline.S
struct context context; // swtch() here to run process
struct file *ofile[NOFILE]; // Open files
struct inode *cwd; // Current directory
char name[16]; // Process name (debugging)
};
进程独立内核页表的意义:每个进程有一个自己的内核页表,主要是为了让内核页表合并用户页表映射,在内核态时不需要切换到用户态进行页表的翻译查找而浪费时间,但每个进程的用户页表都是独立的,因此内核页表最好也独立出来。
对于内核栈,在procinit时就给每个进程初始化好内核栈:
void procinit(void) {
struct proc *p;
initlock(&pid_lock, "nextpid");
for (p = proc; p < &proc[NPROC]; p++) {
initlock(&p->lock, "proc");
if(!p->kstack)
{
char *pa = kalloc();
if (pa == 0) panic("kalloc");
uint64 va = KSTACK((int)(p - proc));
kvmmap(va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
p->kstack = va;
p->kstack_pa = (uint64)pa;
}
}
kvminithart();
}
仿照main.c中的kvminit初始化内核页表
proc.c:
void
prockvmmap(pagetable_t pagetable,uint64 va, uint64 pa, uint64 sz, int perm)
{
if(mappages_with_realloc(pagetable, va, sz, pa, perm) != 0){
// printf("total free = %d \n\r",totalfree());
panic("prockvmmap");
}
}
pagetable_t
prockvminit(){
pagetable_t pagetable = (pagetable_t) kalloc();
if(pagetable == 0)
panic("kalloc");
// each page size is 4096 byte = 4KB
// kernel page table
memset(pagetable, 0, PGSIZE);
// uart registers
// uart mmmap
prockvmmap(pagetable,UART0, UART0, PGSIZE, PTE_R | PTE_W);
// virtio mmio disk interface
prockvmmap(pagetable,VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W);
// CLINT
//prockvmmap(pagetable,CLINT, CLINT, 0x10000, PTE_R | PTE_W);
// PLIC
prockvmmap(pagetable,PLIC, PLIC, 0x400000, PTE_R | PTE_W);
// map kernel text executable and read-only.
prockvmmap(pagetable,KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X);
// map ernel data and the physical RAM we'll make use of.
prockvmmap(pagetable,(uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W);
// map the trampoline for trap entry/exit to
// the highest virtual address in the kernel.
prockvmmap(pagetable,TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X);
return pagetable;
}
在allocproc中初始化页表:
p->k_pagetable = prockvminit();
prockvmmap(p->k_pagetable,p->kstack,p->kstack_pa,PGSIZE, PTE_R | PTE_W);
并添加页表释放函数:
void freewalk_withoutchk(pagetable_t pagetable,int level) {
// there are 2^9 = 512 PTEs in a page table.
for (int i = 0; i < 512; i++) {
pte_t pte = pagetable[i];
if ((pte & PTE_V)) {
uint64 child = PTE2PA(pte);
if(level==2){
freewalk_withoutchk((pagetable_t)child,level-1);
}
else{
kfree((void *)((pagetable_t)child));
}
pagetable[i] = 0;
// this PTE points to a lower-level page table.
}
}
kfree((void *)pagetable);
}
void freekpage(pagetable_t pagetable){
freewalk_withoutchk(pagetable,2);
return;
}
在freeproc内添加有关内核页表的消除:
static void freeproc(struct proc *p) {
if (p->trapframe) kfree((void *)p->trapframe);
p->trapframe = 0;
///添加部分
if (p->k_pagetable) freekpage(p->k_pagetable);
p->k_pagetable = 0;
///添加结束
if (p->pagetable) proc_freepagetable(p->pagetable, p->sz);
p->pagetable = 0;
以及在scheduler中,我们要将当前进程的内核页表设为真内核页表,并进行上下文的切换:
for (p = proc; p < &proc[NPROC]; p++) {
acquire(&p->lock);
if (p->state == RUNNABLE) {
// Switch to chosen process. It is the process's job
// to release its lock and then reacquire it
// before jumping back to us.
p->state = RUNNING;
c->proc = p;
////添加部分
procinithart(p->k_pagetable);
swtch(&c->context, &p->context);
////添加结束
// Process is done running for now.
// It should have changed its p->state before coming back.
c->proc = 0;
found = 1;
}
release(&p->lock);
}
完成,内核页表添加完毕。
3 Simplify copyin/copyinstr (hard)
copyin的作用是将用户态数据拷贝到内核态。现有的copyin与copyinstr是基于软件实现的,要切换成直接使用内核页表完成地址映射,不需要再查用户页表。
用一个sync_pagetable来统一内核与用户页表:
vm.c
int
sync_pagetable(struct proc *p,uint64 start, uint64 end){
pte_t *pte;
uint64 pa, i;
uint flags;
for(i = PGROUNDUP(start); i < end; i += PGSIZE){
if((pte = walk(p->pagetable, i, 0)) == 0)
panic("sync_pagetable: pte should exists");
if((*pte & PTE_V) == 0)
panic("sync_pagetable: pte should exists");
pa = PTE2PA(*pte);
flags = PTE_FLAGS(*pte) & (~PTE_U);
if(mappages_with_realloc(p->k_pagetable, i, PGSIZE, pa, flags) != 0)
goto err;
}
return 0;
err:
uvmunmap(p->k_pagetable,start,(i-start)/PGSIZE, 0);
return -1;
}
需要维护内核用户页表统一性的地方有四处:
1)exec.c,维护进程一开始创建页表后的页表映射
if(p->pid == 1)
vmprint(p->pagetable);
uvmunmap(p->k_pagetable, 0, PGROUNDUP(oldsz)/PGSIZE, 0);
///添加
if(sync_pagetable(p, 0, p->sz) < 0)
goto bad;
///结束
return argc; // this ends up in a0, the first argument to
2)进入用户态的部分:
proc.c
// allocate one user page and copy init's instructions
// and data into it.
uvminit(p->pagetable, initcode, sizeof(initcode));
p->sz = PGSIZE;
///添加
sync_pagetable(p, 0, p->sz);
///结束
// prepare for the very first "return" from kernel to user.
p->trapframe->epc = 0; // user program counter
p->trapframe->sp = PGSIZE; // user stack pointer
safestrcpy(p->name, "initcode", sizeof(p->name));
p->cwd = namei("/");
3)sbrk分配栈空间的部分,会修改内核页表映射,具体在growproc中维护:
if ((sz = uvmalloc(p->pagetable, sz, sz + n)) == 0) {
return -1;
}
////添加部分
if (sync_pagetable(p, p->sz, sz) < 0)
return -1;
////结束
} else if (n < 0) {
4)fork部分,fork出的新进程也需要复制一份内核页表:
np->sz = p->sz;
np->parent = p;
///添加部分
if(sync_pagetable(np,0,np->sz) < 0){
freeproc(np);
release(&np->lock);
return -1;
}
///结束
// copy saved user registers.
*(np->trapframe) = *(p->trapframe);
完成用户-内核页表维护,可以快乐地修改copyin与copystr了,分别在函数下面修改:
vm.c
int copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) {
w_sstatus(r_sstatus() | SSTATUS_SUM);
int ret = copyin_new(pagetable,dst,srcva,len);
w_sstatus(r_sstatus() & ~SSTATUS_SUM);
return ret;
}
// Copy a null-terminated string from user to kernel.
// Copy bytes to dst from virtual address srcva in a given page table,
// until a '\0', or max.
// Return 0 on success, -1 on error.
int copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) {
w_sstatus(r_sstatus() | SSTATUS_SUM);
int ret = copyinstr_new(pagetable,dst,srcva,max);
w_sstatus(r_sstatus() & ~SSTATUS_SUM);
return ret;
}
直接使用新函数,并设置一下r_sstatus寄存器即可。
lab3到这里就完成了。