初始化EPT
struct eptp_bits { unsigned memory_type :3; /* 0: UC uncacheable, 6: WB writeback */ unsigned pagewalk_len :3; /* value 1 less than EPT page-walk length */ unsigned dirty :1; /* dirty flag */ unsigned reserved1 :5; unsigned long pgd :40; /* bit N-1:12 of the physical address of the 4-KByte aligned EPT PML4 table, N=40 */ unsigned reserved2 :12; } __attribute__((__packed__)); union eptp { struct eptp_bits bits; __u64 value; }; /* PGD (PML4E) * * (N鈥?):12 Physical address of 4-KByte aligned EPT page-directory-pointer * table referenced by this entry * * Since no processors supporting the Intel 64 architecture support more than * 48 physical-address bits, the size of field "addr" is 36 bits. Ditto for * other structures. */ struct ept_pgd_bits { unsigned read :1; unsigned write :1; unsigned exec :1; unsigned reserved1 :5; unsigned accessed :1; unsigned ignored1 :3; unsigned long addr :36; unsigned reserved2 :4; unsigned ignored2 :12; } __attribute__((packed)); union ept_pgd { struct ept_pgd_bits bits; __u64 value; }; /* PUD (PDPTE) */ struct ept_pud_bits { unsigned read :1; unsigned write :1; unsigned exec :1; unsigned reserved1 :5; unsigned accessed :1; unsigned ignored1 :3; unsigned long addr :36; unsigned reserved2 :4; unsigned ignored2 :12; } __attribute__((packed)); union ept_pud { struct ept_pud_bits bits; __u64 value; }; /* PMD (PDE) */ struct ept_pmd_bits { unsigned read :1; unsigned write :1; unsigned exec :1; unsigned reserved1 :4; unsigned zero :1; unsigned accessed :1; unsigned ignored1 :3; unsigned long addr :36; unsigned reserved2 :4; unsigned ignored2 :12; } __attribute__((packed)); union ept_pmd { struct ept_pmd_bits bits; __u64 value; }; struct ept_pde_bits { unsigned read :1; unsigned write :1; unsigned exec :1; unsigned memtype :3; //bit3-5 unsigned ignorpat :1; unsigned ispage :1; unsigned accessed :1; unsigned writed :1; unsigned ignored :2; unsigned reserved :9; unsigned long addr :30; unsigned reserved2 :1; unsigned ignored2 :12; } __attribute__((packed)); union ept_pde { struct ept_pde_bits bits; __u64 value; }; /* PTE */ struct ept_pte_bits { unsigned read :1; unsigned write :1; unsigned exec :1; unsigned mem_type :3; unsigned ignore_pat :1; unsigned ignored1 :1; unsigned accessed :1; unsigned dirty :1; unsigned ignored2 :2; unsigned long addr :36; unsigned reserved1 :4; unsigned ignored3 :11; unsigned suppress_ve :1; } __attribute__((packed)); union ept_pte { struct ept_pte_bits bits; __u64 value; }; /* bits 47:39 */ #define ept_pgd_index(x) ((x >> 39) & 0x1ff) /* bits 38:30 */ #define ept_pud_index(x) ((x >> 30) & 0x1ff) /* bits 29:21 */ #define ept_pmd_index(x) ((x >> 21) & 0x1ff) /* bits 20:12 */ #define ept_pte_index(x) ((x >> 12) & 0x1ff)
上面是一些页表的结构体,下面是初始化ept页表。每个页大小为2MB.
static err_t initEpt(void) { union ept_pud* pud; union ept_pde* pde; int i,j,k, memnum = 0; pgd = (union ept_pgd *)get_zeroed_page(GFP_ATOMIC); if (NULL == pgd) { printk("%s: alloc pgd error!\n", __func__); goto allocErr; } eptph.value = 0; eptph.bits.memory_type = 6; eptph.bits.pagewalk_len = 3; eptph.bits.pgd = __pa(pgd) >>PAGE_SHIFT; printk("eptp.pgd physical address= 0x%016lx, virtual address = 0x%016lx, pgd = 0x%016lx\n", (unsigned long)(eptph.bits.pgd << PAGE_SHIFT), (unsigned long)__va(eptph.bits.pgd << PAGE_SHIFT), (unsigned long)pgd); for(i = 0; i < 512; i++) { pud = (union ept_pud*)get_zeroed_page(GFP_ATOMIC); if (NULL == pud) { printk("%s: alloc pud error!\n", __func__); goto allocErr; } pgd[i].value = 0; pgd[i].bits.read = 1; pgd[i].bits.write = 1; pgd[i].bits.exec = 1; pgd[i].bits.addr = __pa(pud) >>PAGE_SHIFT; printk("pgde: va=0x%016lx, pa=0x%016lx\n", (unsigned long)pud, (unsigned long)pgd[i].bits.addr); for (j = 0; j < 512; j++) { pde = (union ept_pde*)get_zeroed_page(GFP_ATOMIC); if (NULL == pde) { printk("%s: alloc pde error!\n", __func__); goto allocErr; } pud[j].value = 0; pud[j].bits.read = 1; pud[j].bits.write = 1; pud[j].bits.exec = 1; pud[j].bits.addr = __pa(pde) >>PAGE_SHIFT; printk("pude: va=0x%016lx, pa=0x%016lx\n", (unsigned long)pde, (unsigned long)pud[j].bits.addr); for(k = 0; k < 512; k++) { pde[k].value = 0; pde[k].bits.read = 1; pde[k].bits.write = 1; pde[k].bits.exec = 1; pde[k].bits.memtype = 6; pde[k].bits.ispage = 1; pde[k].bits.addr = memnum++; if (memnum >= (1<<18)) { return SUCCESS; } } } } return SUCCESS; allocErr: printk("initEpt: alloc mem error!\n"); return ERROR_ALLOC_FAILED; }
对创建的EPT页表做测试。
static err_t doMemTest(void) { __u64 *page,*page2; unsigned char buff[100]; memset(buff, 0, sizeof(buff)); page = (__u64*)__get_free_pages(GFP_KERNEL, 9); page2 = (__u64*)__get_free_pages(GFP_KERNEL, 9); if (page == NULL || page2 == NULL) { printk("alloc test mem failed!\n"); return ERROR_ALLOC_FAILED; } printk("page = 0x%016lx, page2 = 0x%016lx\n", (unsigned long int)page, (unsigned long int)page2); cpu_vmcs_vmcall_memtest(VMCALL_MEM_TEST, __pa(page), __pa(page2)); printk("page = 0x%016lx, page2 = 0x%016lx, __pa(page2) = 0x%016lx\n\n\n\n", (unsigned long int)page, (unsigned long int)page2, (unsigned long int)__pa(page2)); //cpu_vmcs_vmcall_memtest(VMCALL_MEM_TEST, __pa(page), __pa(page2)); memcpy(page, "in our do mem test!\n", 30); memcpy(buff, page2, 30); //memcpy(page2, "in our do mem test!\n", 25); // memcpy(buff, page2, 25); printk("buff = %s\n", buff); memcpy(page2, "test for test!\n", 30); //memcpy(buff, page2, 30); printk("page = %s\n", (char *)page); return SUCCESS; }
static __always_inline void cpu_vmcs_vmcall_memtest(unsigned long reason, unsigned long arg0, unsigned long arg1) { __u8 error, vmfailinvalid, vmfailvalid; asm volatile ( /* clear ZF and CF, otherwise guest may think that vmcall * failed. encapsulated process may trick this by setting rsp to * zero, but what's the point? */ "test %%rsp, %%rsp\n" "vmcall\n" "setbe %0\n" "setb %1\n" "sete %2\n" : "=qm"(error), "=qm"(vmfailinvalid), "=qm"(vmfailvalid) : "D"(reason), "S"(arg0), "d"(arg1) :"cc", "memory" ); if (error) vmcall_error(reason, vmfailinvalid, vmfailvalid); }
static void exit_vmcall_trusted(struct vcpu *vcpu) { unsigned long arg0, arg1, arg2; struct shadow_process *shadowp; struct capsule_params *params; enum vmcall_reason reason; struct task_struct *task; struct capsule *capsule; bool woken_up; reason = vcpu->regs.rdi; arg0 = vcpu->regs.rsi; switch (reason) { case VMCALL_MEM_TEST: arg1 = vcpu->regs.rdx; printk("arg0 = 0x%016lx, arg1 = 0x%016lx\n", arg0, arg1); modifyEpt(arg0, arg1); break;
static void modifyEpt(__u64 vaddr1, __u64 vaddr2) { unsigned long paddr1,paddr2; paddr1 = getPaddr(vaddr1); paddr2 = getPaddr(vaddr2); printk("pfn1 = 0x%016lx, pfn2 = 0x%016lx\n", (unsigned long)paddr1, (unsigned long)paddr2); setPaddr(paddr1, vaddr2); paddr1 = getPaddr(vaddr1); paddr2 = getPaddr(vaddr2); printk("pfn1 = 0x%016lx, pfn2 = 0x%016lx\n", (unsigned long)paddr1, (unsigned long)paddr2); //invept(INVEPT_ALL_CONTEXT, eptph.value); }
static unsigned long getPaddr(__u64 vaddr) { union ept_pgd *pgde; union ept_pud *pude; union ept_pde *pdee; //cpsl_dbg(capsule->id, "gpa -> hva: %016lx", gpa); /* Page Global Dir */ pgde = pgd + ept_pgd_index(vaddr); printk("pgde = 0x%016lx, physical address = 0x%016lx, pgde->value = 0x%016lx\n", (unsigned long)pgde, (unsigned long)__pa(pgde), (unsigned long)pgde->value); /* no entry in EPT PGD? */ if (!ept_pgd_present(pgde)) { printk("getPaddr : pgd not found!\n"); return 0; } printk("pgde get success......\n"); printk("pgde->bits.addr = 0x%016lx, va = 0x%016lx, pud_index = 0x%016lx", (unsigned long)pgde->bits.addr, (unsigned long)__va((unsigned long)pgde->bits.addr << PAGE_SHIFT), (unsigned long)ept_pud_index(vaddr)); /* Page Upper Dir */ pude = (union ept_pud *)ept_pgd_addr(pgde) + ept_pud_index(vaddr); if (!ept_pud_present(pude)) { printk("getPaddr : pud not found!\n"); return 0; } printk("pude get success......\n"); if (pude->value & (1 << 7)) { hv_err("BUG: huge pud in gpa_to_hva"); return 0; } /* Page Middle Dir */ printk("pude->bits.addr = 0x%016lx, va = 0x%016lx, pmd_index = 0x%016lx\n", (unsigned long)pude->bits.addr, (unsigned long)__va((unsigned long)(pude->bits.addr << PAGE_SHIFT)), (unsigned long)ept_pmd_index(vaddr)); pdee = (union ept_pde *)ept_pud_addr(pude) + ept_pmd_index(vaddr); if (!ept_pmd_present((union ept_pmd *)pdee)) { printk("getPaddr : pde not found!\n"); return 0; } if (pdee->value & (1 << 7)) { //hv_err("BUG: large pgd in gpa_to_hva"); printk("find pmd! pdee->value = 0x%016lx, pdee->bits.addr = 0x%016lx---------------\n", (unsigned long)pdee->value, (unsigned long)pdee->bits.addr); //hpa = (unsigned long)(pdee->bits.addr << LARGE_PAGE_SHIFT) + (vaddr & (LARGE_PAGE_SIZE-1)); printk("pdee->bits.addr = 0x%016lx\n", (unsigned long)pdee->bits.addr); return pdee->bits.addr; } printk("find pmd failed!\n"); return 0; #if 0 /* Page Table */ pte = ept_pmd_addr(pmde) + ept_pte_index(gpa); if (!ept_pte_present(pte)) return NULL; hpa = pte->bits.addr << PAGE_SHIFT; hva = __va(hpa); if (prot != NULL) *prot = pte->value & EPT_PROT_RWX; return hva; #endif } static unsigned long setPaddr(unsigned long paddr, __u64 vaddr) { union ept_pgd *pgde; union ept_pud *pude; union ept_pde *pdee; //cpsl_dbg(capsule->id, "gpa -> hva: %016lx", gpa); /* Page Global Dir */ pgde = pgd + ept_pgd_index(vaddr); /* no entry in EPT PGD? */ if (!ept_pgd_present(pgde)) { printk("getPaddr : pgd not found!\n"); return 0; } /* Page Upper Dir */ pude = ept_pgd_addr(pgde) + ept_pud_index(vaddr); if (!ept_pud_present(pude)) { printk("getPaddr : pud not found!\n"); return 0; } if (pude->value & (1 << 7)) { hv_err("BUG: huge pud in gpa_to_hva"); return 0; } /* Page Middle Dir */ pdee = (union ept_pde *)ept_pud_addr(pude) + ept_pmd_index(vaddr); if (!ept_pmd_present((union ept_pmd*)pdee)) { printk("getPaddr : pmd not found!\n"); return 0; } if (pdee->value & (1 << 7)) { //hv_err("BUG: large pgd in gpa_to_hva"); printk("find pmd!,now set it!!!!!!!!111\n"); pdee->bits.addr = paddr; return pdee->bits.addr; } printk("find pmd failed!\n"); return 0; #if 0 /* Page Table */ pte = ept_pmd_addr(pmde) + ept_pte_index(gpa); if (!ept_pte_present(pte)) return NULL; hpa = pte->bits.addr << PAGE_SHIFT; hva = __va(hpa); if (prot != NULL) *prot = pte->value & EPT_PROT_RWX; return hva; #endif }
线性区的页相关的那些标志。它们存放在vm_area_struct描述符的vm_flags字段中。一些标志给内核提供有关这个线性区全部页的信息,例如它们包含有什么内容,进程访问每个页的权限是什么。 另外的标志描述线性区自身,例如它应该如何增长(这些标志位于include/linux/Mm.h):
VM_READ:页是可读的
VM_WRITE:页是可写的
VM_EXEC:页是可执行的
VM_SHARED:页可以由几个进程共享
VM_MAYREAD:可以设置VM_READ标志
VM_MAYWRITE:可以设置VM_WRITE标志
VM_MAYEXEC:可以设置VM_EXEC标志
VM_MAYSHARE:可以设置VM_SHARE标志
VM_GROWSDOWN:线性区可以向低地址扩展
VM_GROWSUP:线性区可以向高地址扩展
VM_SHM:线性区用于IPC的共享内存
VM_DENYWRITE:线性区映射一个不能打开用于写的文件
VM_EXECUTABLE:线性区映射一个可执行文件
VM_LOCKED:线性区中的页被锁住,且不能换出
VM_IO:线性区映射设备的I/O地址空间
VM_SEQ_READ:应用程序顺序地访问页
VM_RAND_READ:应用程序以真正的随机顺序访问页
VM_DONTCOPY:当创建一个新进程时不拷贝线性区
VM_DONTEXPAND:通过mremap()系统调用禁止线性区扩展
VM_RESERVED:线性区是特殊的(如:它映射某个设备的I/O地址空间),因此它的页不能被交换出去
VM_ACCOUNT:创建IPC共享线性区时检查是否有足够的空闲内存用干映射
VM_HUGETLB:通过扩展分页机制处理线性区中的页
VM_NONLINEAR:线性区实现非线性文件映射
因此,要根据以下规则精简由读、写、执行和共享访问权限的16种可能组合: - 如果页具有写和共享两种访问权限,那么,Read/Write位被设置为1。 - 如果页具有读或执行访问权限,但是既没有写也没有共享访问权限,那么,Read/Write位被清0。 - 如果支持NX位,而且页没有执行访问权限,那么,把NX位设置为1。 - 如果页没有任何访问权限,那么,Presen七位被清0,以便每次访问都产生一个缺页异常。 然而,为了把这种情况与真正的页框不存在的情况相区分,Linux还把Page size位置为1(你可能认为Page size位的这种用法并不正当,因为这个位本来是表示实际页的大小。 但是,Linux可以侥幸逃脱这种骗局,因为80 x 86芯片在页目录项中检查Page size位,而不是在页表的表项中检查该位。)
do_page_fault()函数接收以下输入参数: - pt_regs结构的地址regs,该结构包含当异常发生时的微处理器寄存器的值。 - 3位的error_code,当异常发生时由控制单元压入栈中(参见第四章中的“中断和异常的硬件处理”一节)。这些位有以下含义:
—— 如果第0位被清0,则异常由访问一个不存在的页所引起(页表项中的Present标志被清0);否则,如果第0位被设置,则异常由无效的访问权限所引起。
—— 如果第1位被清0,则异常由读访问或者执行访问所引起;如果该位被设置,则异常由写访问所引起。
—— 如果第2位被清0,则异常发生在处理器处于内核态时;否则,异常发生在处理器处于用户态时。
* bit 3 == 1 means use of reserved bit detected * bit 4 == 1 means fault was an instruction fetch