MIT 6.S081 Lab lazy page allocation Part1 Eliminate allocation from sbrk()
前言
Lab的链接 :https://pdos.csail.mit.edu/6.828/2019/labs/lazy.html
uint64 sys_sbrk(void) { int addr; int n; if(argint(0, &n) < 0) return -1; // addr = myproc()->sz; // if(growproc(n) < 0) // return -1; // return addr; addr = myproc()->sz; myproc()->sz += n; return addr; }
即分配堆空间时,不为addr添加虚实映射关系。启动shell后执行命令echo hi,就会panic并打印以下信息:
init: starting sh $ echo hiusertrap(): unexpected scause 0x000000000000000f pid=3 sepc=0x0000000000001258 stval=0x0000000000004008 va=0x0000000000004000 pte=0x0000000000000000 panic: uvmunmap: not mapped
需要我们分析usertrap和panic的原因。
本篇博客全文都在追查这一panic的原因,并对userinit代码进行了下简单的细节层面回顾。
到底是哪个进程在犯罪?
0x0000000000001000 in ?? () => 0x0000000000001000: 97 02 00 00 auipc t0,0x0 (gdb) b panic Breakpoint 1 at 0x80000554: file kernel/printf.c, line 119. (gdb) c Continuing. Breakpoint 1, panic (s=s@entry=0x80008340 "uvmunmap: not mapped") at kernel/printf.c:119 119 { (gdb)
先抓到那个犯罪的进程:
Breakpoint 1, panic (s=s@entry=0x80008340 "uvmunmap: not mapped") at kernel/printf.c:119 119 { (gdb) p cpus[0].proc $1 = (struct proc *) 0x80325600 (gdb) p *cpus[0].proc $2 = {lock = {locked = 1, name = 0x80008478 "process lock", cpu = 0x80012928 <cpus>, n = 267, nts = 0}, state = RUNNING, parent = 0x80325400, chan = 0x0, killed = 0, xstate = 0, pid = 2, kstack = 274877886464, sz = 16384, pagetable = 0x87fa9000, tf = 0x80356000, context = {ra = 2147492184, sp = 274877890272, s0 = 274877890320, s1 = 2150782464, s2 = 2147559640, s3 = 1, s4 = 2150782464, s5 = 4, s6 = 1, s7 = 0, s8 = 0, s9 = 0, s10 = 0, s11 = 0}, ofile = {0x803253c0, 0x803253c0, 0x803253c0, 0x0 <repeats 13 times>}, cwd = 0x8001b548 <icache+32>, name = "sh", '\000' <repeats 13 times>} (gdb)
我本以为是echo程序发生了boom,没想到啊没想到,sh你这个浓眉大眼的家伙居然叛变了......
直接continue,panic触发了:
$ echo hi usertrap(): unexpected scause 0x000000000000000f (store/AMO page fault) pid=3 sepc=0x00000000000012ae stval=0x0000000000004008 va=0x0000000000004000 pte=0x0000000000000000 PANIC: uvmunmap: not mapped HINT: restart xv6 using 'make qemu-gdb', type 'b panic' (to set breakpoint in panic) in the gdb window, followed by 'c' (continue), and when the kernel hits the breakpoint, type 'bt' to get a backtrace
在gdb端ctrl + c暂时中断,按bt查看堆栈:
(gdb) c Continuing. ^C Program received signal SIGINT, Interrupt. consputc (c=c@entry=37) at kernel/console.c:38 38 ; (gdb) b *0x00000000000012ae Breakpoint 2 at 0x12ae (gdb) bt #0 consputc (c=c@entry=37) at kernel/console.c:38 #1 0x000000008000076c in printf (fmt=fmt@entry=0x800081f8 "%ld") at kernel/printf.c:107 #2 0x00000000800005ca in panic (s=s@entry=0x80008340 "uvmunmap: not mapped") at kernel/printf.c:127 #3 0x000000008000121a in uvmunmap (pagetable=pagetable@entry=0x80358000, va=va@entry=0, size=size@entry=81920, do_free=do_free@entry=1) at kernel/vm.c:201 #4 0x000000008000145e in uvmfree (pagetable=pagetable@entry=0x80358000, sz=sz@entry=81920) at kernel/vm.c:316 #5 0x0000000080001cf8 in proc_freepagetable (pagetable=0x80358000, sz=81920) at kernel/proc.c:222 #6 0x0000000080001d2e in freeproc (p=p@entry=0x80325a00) at kernel/proc.c:177 #7 0x0000000080002458 in wait (addr=0) at kernel/proc.c:460 #8 0x0000000080002e5c in sys_wait () at kernel/sysproc.c:38 #9 0x0000000080002d9e in syscall () at kernel/syscall.c:142 #10 0x0000000080002a6a in usertrap () at kernel/trap.c:70 #11 0x0000000000000ace in ?? ()
问题和释放进程的代码有关,查看一下当前的所有进程(procs是我在前面的Lab里写的数据结构,如果没有魔改就直接查proc):
(gdb) p *(struct proc*)(procs.vec_[0]) $10 = {lock = {locked = 0, name = 0x80008478 "process lock", cpu = 0x0, n = 383, nts = 0}, state = SLEEPING, parent = 0x0, chan = 0x80325400, killed = 0, xstate = 0, pid = 1, kstack = 274877894656, sz = 12288, pagetable = 0x8035d000, tf = 0x80355000, context = {ra = 2147492184, sp = 274877898464, s0 = 274877898512, s1 = 2150781952, s2 = 2147559640, s3 = 1, s4 = 2150781952, s5 = 4, s6 = 1, s7 = 0, s8 = 0, s9 = 0, s10 = 0, s11 = 0}, ofile = {0x803253c0, 0x803253c0, 0x803253c0, 0x0 <repeats 13 times>}, cwd = 0x8001b548 <icache+32>, name = "init\000\000de\000\000\000\000\000\000\000"} (gdb) p *(struct proc*)(procs.vec_[1]) $11 = {lock = {locked = 1, name = 0x80008478 "process lock", cpu = 0x80012928 <cpus>, n = 267, nts = 0}, state = RUNNING, parent = 0x80325400, chan = 0x0, killed = 0, xstate = 0, pid = 2, kstack = 274877886464, sz = 16384, pagetable = 0x87fa9000, tf = 0x80356000, context = {ra = 2147492184, sp = 274877890272, s0 = 274877890320, s1 = 2150782464, s2 = 2147559640, s3 = 1, s4 = 2150782464, s5 = 4, s6 = 1, s7 = 0, s8 = 0, s9 = 0, s10 = 0, s11 = 0}, ofile = {0x803253c0, 0x803253c0, 0x803253c0, 0x0 <repeats 13 times>}, cwd = 0x8001b548 <icache+32>, name = "sh", '\000' <repeats 13 times>} (gdb) p *(struct proc*)(procs.vec_[2]) $12 = {lock = {locked = 1, name = 0x80008478 "process lock", cpu = 0x80012928 <cpus>, n = 7, nts = 0}, state = ZOMBIE, parent = 0x80325600, chan = 0x0, killed = 1, xstate = -1, pid = 3, kstack = 274877878272, sz = 81920, pagetable = 0x80358000, tf = 0x0, context = {ra = 2147492184, sp = 274877882224, s0 = 274877882272, s1 = 2150783488, s2 = 2147559640, s3 = 0, s4 = 18446744073709551615, s5 = 18446744073709551615, s6 = 4096, s7 = 0, s8 = 0, s9 = 0, s10 = 0, s11 = 0}, ofile = { 0x0 <repeats 16 times>}, cwd = 0x0, name = "sh", '\000' <repeats 13 times>}
确认这是回收进程释放进程页表的时候造成的惨剧;
第三个进程名字仍然是sh,说明第二个进程发生了fork,查阅kernel/sh,c,可以发现fork相关的代码:
int main(void) { .... // Read and run input commands. while(getcmd(buf, sizeof(buf)) >= 0){ if(buf[0] == 'c' && buf[1] == 'd' && buf[2] == ' '){ // Chdir must be called by the parent, not the child. buf[strlen(buf)-1] = 0; // chop \n if(chdir(buf+3) < 0) fprintf(2, "cannot cd %s\n", buf+3); continue; } if(fork1() == 0) runcmd(parsecmd(buf)); wait(0); } exit(0); } ... }
可以推断,sh首先fork出了新的进程,但还没有来得及在新进程中“装入“echo,就发生了崩溃,而这个崩溃必定和sbrk有关。
OK,犯罪的进程被我们找到了,下面我们进一步深入查找哪行代码触发了pagefault。
继续追查
查看sh.c可以看到跟sbrk相关的函数morecore,而morecore又被malloc调用,malloc是用户请求堆内存的函数。
OK,我们现在来重现一下这个panic。这回我们用vscode让界面好看点。
重启内核,不要打任何断点,先跑到shell界面出现。然后在kernel/proc.c下的scheduler函数上打下断点。这个断点没啥意义,仅仅是让程序停下,好让我们切换符号表而已。
删掉刚才那个断点,在调试控制台输入-exec file user/_sh,切换成sh的符号表,然后在user/umalloc.c的malloc函数下打满断点(下图中打的少了,因为很多断点都会因为代码优化而无法触发)
在终端中键入echo hi,触发断点:
进入morecore(),就可以发现sbrk()即将被调用。我们把断点都删去,键入-exec file kernel/kernel,重新切换回内核的符号表,然后在kernel/syscall下打断点,截获这个sys_sbrk系统调用:
continue执行,再前进几步,就可以进入sys_sbrk:
罪魁祸首出现了。现在我们删光断点,切换回sh的符号表(-exec file user/_sh),然后在user/umalloc.c的morecore函数里打满断点,按c运行到断点处:
注意p是sys_sbrk返回的虚地址,且是一个悬空的虚地址(它在页表中没有对应的实地址)。对这个地址进行访问必定会触发trap。我们接下来切换到kernel的符号表准备捕获这个trap。由于我无法把握准到底这个trap会是usertrap还是kerneltrap(进程虚地址到实地址的映射我记得应该是硬件自动完成的,所以无法确认到底该把断点打到哪儿),因此我把kernel/trap.c中的usertrap和kerneltrap均打满断点,然后continue,发现进入的是usertrap,再执行几步,发现这个进程因为page fault挂掉了,且pagefault的错误日志也被打印到了控制台上:
进程挂掉了,父进程(sh进程,pid=2)要调用wait回收这个子进程,在wait中调用了freeproc。freeproc流程中要释放掉用户的堆空间,负责这一步的函数是uvmfree(),它会释放掉p->sz处的空间。我们在panic上打上断点,然后continue,就在调用堆栈中可以发现它了:
最后由uvmunmap函数检查页表的存在位PTE_V,触发panic。
这样我们就找到了panic的原因。总结一下:
1) shell进程从终端接收到新的命令字符
2) shell发生fork,父进程调用wait等待子进程执行完,子进程首先要解释命令字符
3) 子进程为了解释命令字符,调用了sbrk请求堆空间,并返回虚地址p,但地址p是一个悬空的虚地址,其虚实映射关系不存在,在kernel/umalloc.c下的morecore函数中:
static Header* morecore(uint nu) { char *p; Header *hp; if(nu < 4096) nu = 4096; p = sbrk(nu * sizeof(Header)); if(p == (char*)-1) return 0; hp = (Header*)p; hp->s.size = nu; free((void*)(hp + 1)); return freep; }
if语句访问虚地址p,触发pagefault,进入kernel/trap.c的usertrap()函数中,
5) usertrap()在终端打印pagefault的警告日志,设定本进程的killed为1,即本进程suicide。
6) 子进程suicide,父进程回收子进程。父进程首先要释放掉子进程的堆空间,调用uvmfree。在释放过程中由于p->sz处PTE无效,触发panic。
再谈谈userinit
如果你仔细阅读过kernel/proc.c的代码,应该还会记得用户的第一个进程是没有分配用户栈空间的。回顾一下userinit的代码:
// Set up first user process. void userinit(void) { struct proc *p; p = allocproc(); initproc = p; // allocate one user page and copy init's instructions // and data into it. uvminit(p->pagetable, initcode, sizeof(initcode)); p->sz = PGSIZE; vmprint(p->pagetable); // prepare for the very first "return" from kernel to user. p->tf->epc = 0; // user program counter p->tf->sp = PGSIZE; // user stack pointer safestrcpy(p->name, "initcode", sizeof(p->name)); p->cwd = namei("/"); p->state = RUNNABLE; release(&p->lock); }
p-tf->sp直接指向了PGSIZE。此时页表中是没有PGSIZE这个虚地址对应的实地址的。这也就意味着,当这个用户进程被调用时,坚决不能访问栈空间,否则就是前面的echo hi的下场,还没echo出来就凉透了。那么用户的第一个进程为什么能做到不访问栈空间?即initcode为什么没有使用栈空间?
可以看一下我之前的blog,对这个initcode进行了调试:https://www.cnblogs.com/KatyuMarisaBlog/p/13727565.html
The target architecture is assumed to be riscv:rv64 0x0000000000001000 in ?? () (gdb) b *0x0 Breakpoint 1 at 0x0 (gdb) c Continuing. Breakpoint 1, 0x0000000000000000 in ?? () => 0x0000000000000000: 17 05 00 00 auipc a0,0x0 (gdb) si 0x0000000000000004 in ?? () => 0x0000000000000004: 13 05 05 02 addi a0,a0,32 (gdb) 0x0000000000000008 in ?? () => 0x0000000000000008: 97 05 00 00 auipc a1,0x0 (gdb) 0x000000000000000c in ?? () => 0x000000000000000c: 93 85 05 02 addi a1,a1,32 (gdb) 0x0000000000000010 in ?? () => 0x0000000000000010: 9d 48 li a7,7 (gdb) 0x0000000000000012 in ?? () => 0x0000000000000012: 73 00 00 00 ecall
可以看到,initcode根本没有访问栈空间,全程都在使用寄存器。initcode其实就是exec("init"),调用exec将用户程序"init"装入到第一个进程中。
我们查看kernel/exec.c,可以发现,exec在加载ELF文件时,为这个进程分配了用户栈空间,并将映射关系添加到了页表中:
int exec(char *path, char **argv) { ....... // Allocate two pages at the next page boundary. // Use the second as the user stack. sz = PGROUNDUP(sz); if((sz = uvmalloc(pagetable, sz, sz + 2*PGSIZE)) == 0) goto bad; uvmclear(pagetable, sz-2*PGSIZE); sp = sz; stackbase = sp - PGSIZE; // Push argument strings, prepare rest of stack in ustack. for(argc = 0; argv[argc]; argc++) { if(argc >= MAXARG) goto bad; sp -= strlen(argv[argc]) + 1; sp -= sp % 16; // riscv sp must be 16-byte aligned if(sp < stackbase) goto bad; if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0) goto bad; ustack[argc] = sp; } ustack[argc] = 0; // push the array of argv[] pointers. sp -= (argc+1) * sizeof(uint64); sp -= sp % 16; if(sp < stackbase) goto bad; if(copyout(pagetable, sp, (char *)ustack, (argc+1)*sizeof(uint64)) < 0) goto bad; // arguments to user main(argc, argv) // argc is returned via the system call return // value, which goes in a0. p->tf->a1 = sp; // Save program name for debugging. for(last=s=path; *s; s++) if(*s == '/') last = s+1; safestrcpy(p->name, last, sizeof(p->name)); // Commit to the user image. oldpagetable = p->pagetable; p->pagetable = pagetable; p->sz = sz; p->tf->epc = elf.entry; // initial program counter = main p->tf->sp = sp; // initial stack pointer proc_freepagetable(oldpagetable, oldsz); return argc; // this ends up in a0, the first argument to main(argc, argv) ......... }
由于第二个进程shell是fork自第一个进程的,因此第二个进程执行exec时是有栈空间的,没有触发pagefault。
为什么xv6下用户程序main函数要用exit(0)?
如果你曾经做过Lab1 你可能会因为在main函数中使用return 0替代exit(0)。在执行这个用户程序时可能会打印各种奇怪的东西,还可能会触发pagefault:
# 我曾经写的user/pingpong.c,最后return 0会打印以下奇怪的信息。[line 16]是不应该被打印出来的。
$ pingpong 5: received ping [line 16] usertrap(): unexpected scause 0x000000000000000d (load page fault) pid=5 sepc=0x00000000000006f0 stval=0xfffffffffffff86a 4: received pong [line 16] usertrap(): unexpected scause 0x000000000000000d (load page fault) pid=4 sepc=0x00000000000006f0 stval=0xfffffffffffff867
使用exit(0)的原因很容易理解:main结束时进程就应当结束。进程自己调用exit(0),释放掉自己可以释放的资源(在xv6中它仅仅可以自己关闭自己所有的文件描述符),唤醒自己的父进程。如果有孩子进程,则将孩子进程丢给initproc;本进程剩余的资源由父进程回收。
但使用return 0会发生什么呢?如果你曾调试过函数的调用和返回过程,你就会发现其中的盲点:return语句会被编译为ret指令,ret指令会释放函数栈空间,将返回值地址寄存器ra中的值写入到pc中。但此时ra中的值是什么呢?如果ra值是个未定义的值,那么会发生什么呢?
我们随便调试一个程序来检查一下吧,这是我的user/sleep.c代码:
#include "kernel/types.h" #include "user.h" int parse_int(const char* arg) { const char* p = arg; for ( ; *p ; p++ ) { if ( *p < '0' || *p > '9' ) { return -1; } } return atoi(arg); } int main(int argc,char** argv) { int time; if (argc != 2) { printf("you must input one argument only\n"); exit(0); } time = parse_int(argv[1]); if (time < 0) { printf("error argument : %s\n",argv[1]); exit(0); } sleep(time); exit(0); }
如果你对xv6的debug方法不太熟悉,推荐阅读一下这篇blog:https://www.cnblogs.com/KatyuMarisaBlog/p/13727565.html
我们回顾一下我们在shell输入sleep 10时经历的过程:
1) shell 对sleep 10进行解析。
2)shell进程发生fork,子进程调用exec并执行sleep 10。
3)子进程结束,父进程从wait中恢复。
开局我们用user/_sh的符号表,把断点打在runcmd上:
For help, type "help". Type "apropos word" to search for commands related to "word"... Reading symbols from kernel/kernel... The target architecture is assumed to be riscv:rv64 0x0000000000001000 in ?? () (gdb) file user/_sh Reading symbols from user/_sh... (gdb) b runcmd Breakpoint 1 at 0xa8: file user/sh.c, line 59. (gdb) c Continuing. Breakpoint 1, runcmd (cmd=0x13f50) at user/sh.c:59 59 { (gdb) n 67 if(cmd == 0) (gdb) 70 switch(cmd->type){ (gdb) 76 if(ecmd->argv[0] == 0) (gdb) 78 exec(ecmd->argv[0], ecmd->argv); (gdb) info reg ra ra 0xb4a 0xb4a <main+234> (gdb) si 0x00000000000000f6 78 exec(ecmd->argv[0], ecmd->argv); (gdb) 0x00000000000000fa 78 exec(ecmd->argv[0], ecmd->argv); (gdb) exec () at user/usys.S:45 45 li a7, SYS_exec (gdb) info reg ra ra 0xfe 0xfe <runcmd+86>
可以看到,执行到exec时ra的值已经被更新为了0xfe,我们查看一下sh.asm确认一下这个地址的代码段:
// user/sh.asm
if(ecmd->argv[0] == 0) ee: 6508 ld a0,8(a0) f0: c515 beqz a0,11c <runcmd+0x74> exec(ecmd->argv[0], ecmd->argv); f2: 00848593 addi a1,s1,8 f6: 00001097 auipc ra,0x1 fa: d04080e7 jalr -764(ra) # dfa <exec> fprintf(2, "exec %s failed\n", ecmd->argv[0]); fe: 6490 ld a2,8(s1) 100: 00001597 auipc a1,0x1 104: 20858593 addi a1,a1,520 # 1308 <malloc+0x108> 108: 4509 li a0,2 10a: 00001097 auipc ra,0x1 10e: 00a080e7 jalr 10(ra) # 1114 <fprintf>
这个地址正好是执行完exec后应该返回的地址,但我们知道,exec函数是永远不会返回的。在代码运行到此时时处于用户态,执行完exec系统调用后本进程被装入"sleep”,而执行sleep的main函数时,寄存器ra的值是不变的!这也就意味着,当main函数结束后,pc的值将会被更新为0xfe。我们重启一下xv6,将断点打在user/_sleep上验证一下我们的猜想:
For help, type "help".
Type "apropos word" to search for commands related to "word"...
Reading symbols from kernel/kernel...
The target architecture is assumed to be riscv:rv64
0x0000000000001000 in ?? ()
(gdb) file user/_sleep
Reading symbols from user/_sleep...
(gdb) b ma
Function "ma" not defined.
(gdb) b main
Breakpoint 1 at 0x3a: file user/sleep.c, line 14.
(gdb) c
Continuing.
Breakpoint 1, main (argc=2, argv=0x2 <parse_int+2>) at user/sleep.c:14
14 int main(int argc,char** argv) {
(gdb)
Continuing.
Breakpoint 1, main (argc=2, argv=0x2fc0) at user/sleep.c:14
14 int main(int argc,char** argv) {
(gdb) p argv[1]
$1 = 0x2fe0 "10"
(gdb) info reg ra0
Invalid register `ra0'
(gdb) info reg ra
ra 0xfe 0xfe <strlen+20>
(gdb)
执行下去,对照着sleep.asm来看:
(gdb)
22 if (time < 0) {
(gdb)
26 sleep(time);
(gdb)
27 return 0;
(gdb) si
0x0000000000000064 27 return 0;
(gdb)
0x0000000000000066 27 return 0;
(gdb)
0x0000000000000068 27 return 0;
(gdb)
0x000000000000006a 27 return 0;
(gdb)
0x000000000000006c 27 return 0;
(gdb)
0x00000000000000fe in strlen (
s=0xffffffffffffffff <error: Cannot access memory at address 0xffffffffffffffff>)
at user/ulib.c:30
30 for(n = 0; s[n]; n++)
(gdb) info reg pc
pc 0xfe 0xfe <strlen+18>
(gdb) info reg ra
ra 0xfe 0xfe <strlen+18>
// user/sleep.asm uint strlen(const char *s) { ec: 1141 addi sp,sp,-16 ee: e422 sd s0,8(sp) f0: 0800 addi s0,sp,16 int n; for(n = 0; s[n]; n++) f2: 00054783 lbu a5,0(a0) f6: cf91 beqz a5,112 <strlen+0x26> f8: 0505 addi a0,a0,1 fa: 87aa mv a5,a0 fc: 4685 li a3,1 fe: 9e89 subw a3,a3,a0 100: 00f6853b addw a0,a3,a5 104: 0785 addi a5,a5,1 106: fff7c703 lbu a4,-1(a5) 10a: fb7d bnez a4,100 <strlen+0x14> ; return n; }
综上所述,我们在xv6下编写用户程序时,最后一定要调用exit函数结束整个进程,不应该使用return语句。
至于为什么普通的C程序可以在main函数后使用return语句,可以参考这篇知乎回答:https://www.zhihu.com/answer/783725339
OK,PART1到此结束。