3.代码分析
一、初始化
kernel\main.c\main()
void main(){
...
kinit(); // physical page allocator
kvminit(); // create kernel page table
kvminithart(); // turn on paging
...
}
1.1 kinit
- 物理页初始化
kernel\kalloc.c
void kinit() {
initlock(&kmem.lock, "kmem");
// end是内核地址末尾
// #define PHYSTOP (0x80000000 + 128*1024*1024)
freerange(end, (void*)PHYSTOP);
}
void freerange(void *pa_start, void *pa_end) {
char *p;
/*
#define PGSIZE 4096
#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1))
*/
p = (char*)PGROUNDUP((uint64)pa_start);
for(; p + PGSIZE <= (char*)pa_end; p += PGSIZE)
kfree(p);
}
kfree
void kfree(void *pa) {
struct run *r;
if(((uint64)pa % PGSIZE) != 0 || (char*)pa < end || (uint64)pa >= PHYSTOP)
panic("kfree");
// Fill with junk to catch dangling refs.
memset(pa, 1, PGSIZE);
r = (struct run*)pa;
acquire(&kmem.lock);
r->next = kmem.freelist;
kmem.freelist = r;
release(&kmem.lock);
}
处理从内核的后一个位置,一直到空间末尾PHYSTOP
4KB分页,向上取整,将其放置到kmem.freelist
中供使用。
1.2 kvminit
- 创建内核页表
- pte:page table entry,页表项
kernel\vm.c
void kvminit(void) {
kernel_pagetable = kvmmake();
}
typedef uint64 *pagetable_t;
pagetable_t kvmmake(void) {
pagetable_t kpgtbl;
// kalloc会返回一块4KB大小的内存
kpgtbl = (pagetable_t) kalloc();
memset(kpgtbl, 0, PGSIZE);
// uart registers
kvmmap(kpgtbl, UART0, UART0, PGSIZE, PTE_R | PTE_W);
。。。。。
// allocate and map a kernel stack for each process.
proc_mapstacks(kpgtbl);
return kpgtbl;
}
kvmmap
// 把pa开始的sz大小的空间映射到va,权限是perm
void kvmmap(pagetable_t kpgtbl, uint64 va, uint64 pa, uint64 sz, int perm) {
if(mappages(kpgtbl, va, sz, pa, perm) != 0)
panic("kvmmap");
}
mappages
int mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) {
uint64 a, last;
// typedef uint64 pte_t;
pte_t *pte;
if((va % PGSIZE) != 0)
panic("mappages: va not aligned");
if((size % PGSIZE) != 0)
panic("mappages: size not aligned");
if(size == 0)
panic("mappages: size");
a = va;
last = va + size - PGSIZE;
for(;;){
if((pte = walk(pagetable, a, 1)) == 0)
return -1;
if(*pte & PTE_V)
panic("mappages: remap");
*pte = PA2PTE(pa) | perm | PTE_V;
if(a == last)
break;
a += PGSIZE;
pa += PGSIZE;
}
return 0;
}
-
#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10)
页表项的格式如下图:PPN + Flags,
PA2PTE
将物理地址转换为页表项先右移12位去掉块内地址,然后左移10位获得Flags
walk
#define PGSHIFT 12
#define PXSHIFT(level) (PGSHIFT+(9*(level)))
#define PXMASK 0x1FF // 9 bits
#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK)
pte_t * walk(pagetable_t pagetable, uint64 va, int alloc) {
if(va >= MAXVA)
panic("walk");
for(int level = 2; level > 0; level--) {
pte_t *pte = &pagetable[PX(level, va)];
if(*pte & PTE_V) {
// #define PTE2PA(pte) (((pte) >> 10) << 12)
pagetable = (pagetable_t)PTE2PA(*pte);
} else {
// 如果页表项不存在,且alloc为1,那就创建一页,存储到这个页表项中
if(!alloc || (pagetable = (pde_t*)kalloc()) == 0)
return 0;
memset(pagetable, 0, PGSIZE);
*pte = PA2PTE(pagetable) | PTE_V;
}
}
return &pagetable[PX(0, va)];
}
![image-20240130163802883](http://pic-save-fury.oss-cn-shanghai.aliyuncs.com/uPic/image-20240130163802883.png)
三级页表格式如图,每一级9位
假设level等于2
PXSHIFT(2)=12+2*9=12+18=30
PX(2,va)=((((uint64)(va))>>30)&0X1FF)
PX的作用:传入页表等级,返回对应页表索引
proc_mapstacks
#define MAXVA (1L << (9 + 9 + 9 + 12 - 1))
#define PGSIZE 4096 // bytes per page
#define TRAMPOLINE (MAXVA - PGSIZE)
// map kernel stacks beneath the trampoline,
// each surrounded by invalid guard pages.
#define KSTACK(p) (TRAMPOLINE - (p)*2*PGSIZE - 3*PGSIZE)
void proc_mapstacks(pagetable_t kpgtbl) {
struct proc *p;
for(p = proc; p < &proc[NPROC]; p++) {
char *pa = kalloc();
if(pa == 0)
panic("kalloc");
uint64 va = KSTACK((int) (p - proc));
kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W);
}
}
![image-20240130223811947](http://pic-save-fury.oss-cn-shanghai.aliyuncs.com/uPic/image-20240130223811947.png)
- 为每个进程创建一个KstackX,并在每个Kstack上方放置一个Guard page放置栈溢出
1.3 kvminithart - 启动页表功能
// 关于SATP_SV39和MAKE_SATP的介绍,在本章文档学习笔记的附录中有说明
#define SATP_SV39 (8L << 60)
#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)pagetable) >> 12))
void kvminithart() {
// wait for any previous writes to the page table memory to finish.
sfence_vma();
w_satp(MAKE_SATP(kernel_pagetable));
// flush stale entries from the TLB.
sfence_vma();
}
将kernel_pagetable
写入satp
// flush the TLB.
static inline void sfence_vma() {
// the zero, zero means flush all TLB entries.
asm volatile("sfence.vma zero, zero");
}
- CPU 会在TLB中缓存页表项,更改页表时,必须告诉CPU让TLB缓存失效。如果不做,之后可能会出错。
- 指令
sfence.vma
,用于刷新当前TLB。Xv6在重新加载satp
寄存器后会执行sfence.vma
,并在返回用户空间之前切换到用户页表的trampoline
代码中执行sfence.vma(kernel/trampoline.S:89)
。 - 在更改
satp
之前也有必要发出sfence.vma
,以确保前面页表的更新已经完成,并确保前面的加载和存储使用的是旧的页表,而不是新的。
二、物理内存分配
主要和kernel\kalloc.c
有关
- xv6启动时,在
kinit
这一步初始化所有物理地址,之前讲过kinit相关代码 - 物理内存只有128MB
kfree
返还内存给内核kalloc
向内核申请内存- 通过一个自旋锁保护空闲内存链表
- 调用
kfree
时,会将待释放内存的所有字节置为1。这将导致释放内存后,内存读取到的是垃圾信息而不是旧的有效内容;从而让错误代码更快地崩溃。
三、sbrk
用于增加或减少进程内存
四、exec
五、其他代码讲解
argint
copyin
// Copy from user to kernel.
// Copy len bytes to dst from virtual address srcva in a given page table.
// Return 0 on success, -1 on error.
int copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) {
uint64 n, va0, pa0;
while(len > 0){
va0 = PGROUNDDOWN(srcva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (srcva - va0);
if(n > len)
n = len;
memmove(dst, (void *)(pa0 + (srcva - va0)), n);
len -= n;
dst += n;
srcva = va0 + PGSIZE;
}
return 0;
}
copyinstr
// Copy a null-terminated string from user to kernel.
// Copy bytes to dst from virtual address srcva in a given page table,
// until a '\0', or max.
// Return 0 on success, -1 on error.
int
copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max)
{
uint64 n, va0, pa0;
int got_null = 0;
while(got_null == 0 && max > 0){
va0 = PGROUNDDOWN(srcva);
pa0 = walkaddr(pagetable, va0);
if(pa0 == 0)
return -1;
n = PGSIZE - (srcva - va0);
if(n > max)
n = max;
char *p = (char *) (pa0 + (srcva - va0));
while(n > 0){
if(*p == '\0'){
*dst = '\0';
got_null = 1;
break;
} else {
*dst = *p;
}
--n;
--max;
p++;
dst++;
}
srcva = va0 + PGSIZE;
}
if(got_null){
return 0;
} else {
return -1;
}
}
copyout
int copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len)
{
uint64 n, va0, pa0;
pte_t *pte;
while(len > 0){
va0 = PGROUNDDOWN(dstva);
if(va0 >= MAXVA)
return -1;
pte = walk(pagetable, va0, 0);
if(pte == 0 || (*pte & PTE_V) == 0 || (*pte & PTE_U) == 0 ||
(*pte & PTE_W) == 0)
return -1;
pa0 = PTE2PA(*pte);
n = PGSIZE - (dstva - va0);
if(n > len)
n = len;
memmove((void *)(pa0 + (dstva - va0)), src, n);
len -= n;
src += n;
dstva = va0 + PGSIZE;
}
return 0;
}