linux page table
14.172 Virtual Memory Management, Page Table, Prefix Addressing
https://www.youtube.com/watch?v=WxYiXDSyiZ0
Linux分页机制之分页机制的实现详解--Linux内存管理(八)
https://blog.csdn.net/gatieme/article/details/52403013
page table PTE/PMD/PGD bitwidth
take VA_BITS 39, 3 levels page tables as example:
#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) /*512*/
4.19\arch\arm64\include\asm\Pgtable-hwdef.h
#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) //((12 - 3) * (4 - 1) + 3) = 27 +3 = 30, for PGDIR_SHIFT when CONFIG_PGTABLE_LEVELS is 3.
#if CONFIG_PGTABLE_LEVELS > 2 #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) //=21 #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) //1<<21 #define PMD_MASK (~(PMD_SIZE-1)) #define PTRS_PER_PMD PTRS_PER_PTE #endif
#if CONFIG_PGTABLE_LEVELS > 3 #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) #define PTRS_PER_PUD PTRS_PER_PTE #endif
when no pud, PTRS_PER_PUD is:
include\asm-generic\Pgtable-nop4d-hack.h
#define PTRS_PER_PUD 1
when CONFIG_PGTABLE_LEVELS is 3, there is no PUD
#define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) //=30 when CONFIG_PGTABLE_LEVELS is 3
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) //1<< 30
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) //1<< (39-30) = 1 << 9 = 512
PGDIR_SHIFT: 30
PMD_SHIFT: 21
pgo:[11:0] = 12bit
PTE:[20:12] = 9bit
PMD:[29:21] = 9bit
PGD:[38:30] = 9bit
virtual addr total bits: 39
4K*512 = 2M(per PMD)
4K*512*512 = 1G(per PGD)
512个PGD,total:
512*1G =512G
page table header include relation
arch/arm64/include/asm/pgtable.h include: #include <asm/pgtable-prot.h> include: arch/arm64/include/asm/pgtable-types.h include: #if CONFIG_PGTABLE_LEVELS == 2 #define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopmd.h> #elif CONFIG_PGTABLE_LEVELS == 3 #define __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nopud.h> #elif CONFIG_PGTABLE_LEVELS == 4 #include <asm-generic/5level-fixup.h> #endif take 3 level page table as example, it will be pgtable-nopud.h which include: #ifdef __ARCH_USE_5LEVEL_HACK #include <asm-generic/pgtable-nop4d-hack.h> #else #include <asm-generic/pgtable-nop4d.h> #define __PAGETABLE_PUD_FOLDED 1
in pgtable-nop4d-hack.h, it defines macros and functions like below:
static inline int pgd_none(pgd_t pgd) { return 0; } static inline int pgd_bad(pgd_t pgd) { return 0; } static inline int pgd_present(pgd_t pgd) { return 1; } static inline void pgd_clear(pgd_t *pgd) { }
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address) { return (pud_t *)pgd; }
page table descriptor
page table descriptor以ARM64为例,它是8byte,descriptor就是各级page table里的entry,其低bit(bit0)表示这个entry是否是valid的,如果为1,表示valid;如果为0,表示invalid。
pud_sect()是判断一个pud descriptor是否为指向一个block,如果指向一个block,descriptor bit1为0:
4.19\arch\arm64\include\asm\Pgtable.h
#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3 static inline bool pud_sect(pud_t pud) { return false; } static inline bool pud_table(pud_t pud) { return true; } #else #define pud_sect(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_SECT) #define pud_table(pud) ((pud_val(pud) & PUD_TYPE_MASK) == \ PUD_TYPE_TABLE) #endif
dump ARM64 page table
含义MEM/NORMAL-NC、MEM/NORMAL
The following line from the kernel page tables illustrates non-cached normal memory.
0xffffff800a107000-0xffffff800a10f000 32K PTE RW NX SHD AF UXN MEM/NORMAL-NC
The following line from the kernel page tables illustrated cached normal memory.
0xffffffc000000000-0xffffffc000080000 512K PTE RW NX SHD AF UXN MEM/NORMAL
from: https://xilinx-wiki.atlassian.net/wiki/spaces/A/pages/18842098/Zynq+UltraScale+MPSoC+Cache+Coherency
cat kernel page table
相关dump code在arch/arm64/mm下,需要开这个下面的config才有下面这个结点:
cat /sys/kernel/debug/kernel_page_tables
打印某个地址的PGD/PUD/PMD/PTE page table descriptor
参见show_pte()
void show_pte(unsigned long addr) { struct mm_struct *mm; pgd_t *pgdp; pgd_t pgd; if (addr < TASK_SIZE) { /* TTBR0 */ mm = current->active_mm; if (mm == &init_mm) { pr_alert("[%016lx] user address but active_mm is swapper\n", addr); return; } } else if (addr >= VA_START) { /* TTBR1 */ mm = &init_mm; } else { pr_alert("[%016lx] address between user and kernel address ranges\n", addr); return; } pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, VA_BITS, mm->pgd); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd)); do { pud_t *pudp, pud; pmd_t *pmdp, pmd; pte_t *ptep, pte; if (pgd_none(pgd) || pgd_bad(pgd)) break; pudp = pud_offset(pgdp, addr); pud = READ_ONCE(*pudp); pr_cont(", pud=%016llx", pud_val(pud)); if (pud_none(pud) || pud_bad(pud)) break; pmdp = pmd_offset(pudp, addr); pmd = READ_ONCE(*pmdp); pr_cont(", pmd=%016llx", pmd_val(pmd)); if (pmd_none(pmd) || pmd_bad(pmd)) break; ptep = pte_offset_map(pmdp, addr); pte = READ_ONCE(*ptep); pr_cont(", pte=%016llx", pte_val(pte)); pte_unmap(ptep); } while(0); pr_cont("\n"); }
pgd_addr_end macro
此macro是取得下一个pgd,如果end在当前pgd size范围之内,则返回end;否则end在当前pgd范围之外(大于当前pgd end),则返回下一个pgd的start address(下一个pgd start = curr pgd start + PGDIR_SIZE)
一般的case下,它是返回的end,因为一个pgd size 1G(在3级页表的配置下)
#define pgd_addr_end(addr, end) \ ({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \ (__boundary - 1 < (end) - 1)? __boundary: (end); \ })
获取struct page的物理地址
struct page数组是存放在vmemmap区,可以使用下面的函数获取这个struct page对应的PA:
page = alloc_page(GFP_KERNEL); if(page == NULL) { pr_err("alloc_page failed.\n"); return; } pgd = pgd_offset_k((unsigned long)page); if (pgd_none(*pgd)) { pr_err("pgd is none.\n"); return; } pmd = pmd_offset((pud_t*)pgd, (unsigned long)page); if (pmd_none(*pmd)) { pr_err("pmd is none.\n"); return; } pr_emerg("page struct's va addr: %#px, pa addr: %#lx.\n", page, (PTE_ADDR_LOW&pmd->pmd)+0x1fffff&pmd->pmd);
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· 没有源码,如何修改代码逻辑?
· 一个奇形怪状的面试题:Bean中的CHM要不要加volatile?
· [.NET]调用本地 Deepseek 模型
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· 在鹅厂做java开发是什么体验
· 百万级群聊的设计实践
· WPF到Web的无缝过渡:英雄联盟客户端的OpenSilver迁移实战
· 永远不要相信用户的输入:从 SQL 注入攻防看输入验证的重要性
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析