linux page table

 

14.172 Virtual Memory Management, Page Table, Prefix Addressing

https://www.youtube.com/watch?v=WxYiXDSyiZ0

 

Linux分页机制之分页机制的实现详解--Linux内存管理(八)

https://blog.csdn.net/gatieme/article/details/52403013

 

page table PTE/PMD/PGD bitwidth

take VA_BITS 39, 3 levels page tables as example:

 

#define PTRS_PER_PTE        (1 << (PAGE_SHIFT - 3)) /*512*/

 

 

4.19\arch\arm64\include\asm\Pgtable-hwdef.h

#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n)    ((PAGE_SHIFT - 3) * (4 - (n)) + 3) //((12 - 3) * (4 - 1) + 3) = 27 +3 = 30, for PGDIR_SHIFT when CONFIG_PGTABLE_LEVELS is 3.

 

#if CONFIG_PGTABLE_LEVELS > 2
#define PMD_SHIFT        ARM64_HW_PGTABLE_LEVEL_SHIFT(2) //=21
#define PMD_SIZE        (_AC(1, UL) << PMD_SHIFT) //1<<21
#define PMD_MASK        (~(PMD_SIZE-1))
#define PTRS_PER_PMD        PTRS_PER_PTE
#endif

 

#if CONFIG_PGTABLE_LEVELS > 3
#define PUD_SHIFT        ARM64_HW_PGTABLE_LEVEL_SHIFT(1)
#define PUD_SIZE        (_AC(1, UL) << PUD_SHIFT)
#define PUD_MASK        (~(PUD_SIZE-1))
#define PTRS_PER_PUD        PTRS_PER_PTE
#endif
when no pud, PTRS_PER_PUD is:
include\asm-generic\Pgtable-nop4d-hack.h
#define PTRS_PER_PUD    1

 


when CONFIG_PGTABLE_LEVELS is 3, there is no PUD

#define PGDIR_SHIFT        ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) //=30 when CONFIG_PGTABLE_LEVELS is 3
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) //1<< 30 
#define PGDIR_MASK (~(PGDIR_SIZE-1))
#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) //1<< (39-30) = 1 << 9 = 512

PGDIR_SHIFT: 30
PMD_SHIFT: 21

pgo:[11:0] = 12bit
PTE:[20:12] = 9bit
PMD:[29:21] = 9bit
PGD:[38:30] = 9bit

virtual addr total bits: 39

4K*512 = 2M(per PMD)
4K*512*512 = 1G(per PGD)

512个PGD,total:
512*1G =512G

 

page table header include relation

arch/arm64/include/asm/pgtable.h include:
#include <asm/pgtable-prot.h> include:

arch/arm64/include/asm/pgtable-types.h include:
#if CONFIG_PGTABLE_LEVELS == 2
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
#elif CONFIG_PGTABLE_LEVELS == 3
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopud.h>
#elif CONFIG_PGTABLE_LEVELS == 4
#include <asm-generic/5level-fixup.h>
#endif

take 3 level page table as example, it will be pgtable-nopud.h which include:
#ifdef __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nop4d-hack.h>
#else
#include <asm-generic/pgtable-nop4d.h>

#define __PAGETABLE_PUD_FOLDED 1

 

in pgtable-nop4d-hack.h, it defines macros and functions like below:

static inline int pgd_none(pgd_t pgd)        { return 0; }
static inline int pgd_bad(pgd_t pgd)        { return 0; }
static inline int pgd_present(pgd_t pgd)    { return 1; }
static inline void pgd_clear(pgd_t *pgd)    { }
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
{
    return (pud_t *)pgd;
}

 

page table descriptor

page table descriptor以ARM64为例,它是8byte,descriptor就是各级page table里的entry,其低bit(bit0)表示这个entry是否是valid的,如果为1,表示valid;如果为0,表示invalid。

pud_sect()是判断一个pud descriptor是否为指向一个block,如果指向一个block,descriptor bit1为0:

4.19\arch\arm64\include\asm\Pgtable.h

#if defined(CONFIG_ARM64_64K_PAGES) || CONFIG_PGTABLE_LEVELS < 3
static inline bool pud_sect(pud_t pud) { return false; }
static inline bool pud_table(pud_t pud) { return true; }
#else
#define pud_sect(pud)        ((pud_val(pud) & PUD_TYPE_MASK) == \
                 PUD_TYPE_SECT)
#define pud_table(pud)        ((pud_val(pud) & PUD_TYPE_MASK) == \
                 PUD_TYPE_TABLE)
#endif

 

dump ARM64 page table

含义MEM/NORMAL-NC、MEM/NORMAL

The following line from the kernel page tables illustrates non-cached normal memory.

0xffffff800a107000-0xffffff800a10f000 32K PTE RW NX SHD AF UXN MEM/NORMAL-NC

 

The following line from the kernel page tables illustrated cached normal memory.

0xffffffc000000000-0xffffffc000080000 512K PTE RW NX SHD AF UXN MEM/NORMAL

 from: https://xilinx-wiki.atlassian.net/wiki/spaces/A/pages/18842098/Zynq+UltraScale+MPSoC+Cache+Coherency

 

cat kernel page table

相关dump code在arch/arm64/mm下,需要开这个下面的config才有下面这个结点:

cat /sys/kernel/debug/kernel_page_tables

 

打印某个地址的PGD/PUD/PMD/PTE page table descriptor

参见show_pte()

void show_pte(unsigned long addr)
{
    struct mm_struct *mm;
    pgd_t *pgdp;
    pgd_t pgd;

    if (addr < TASK_SIZE) {
        /* TTBR0 */
        mm = current->active_mm;
        if (mm == &init_mm) {
            pr_alert("[%016lx] user address but active_mm is swapper\n",
                 addr);
            return;
        }
    } else if (addr >= VA_START) {
        /* TTBR1 */
        mm = &init_mm;
    } else {
        pr_alert("[%016lx] address between user and kernel address ranges\n",
             addr);
        return;
    }

    pr_alert("%s pgtable: %luk pages, %u-bit VAs, pgdp = %p\n",
         mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K,
         VA_BITS, mm->pgd);
    pgdp = pgd_offset(mm, addr);
    pgd = READ_ONCE(*pgdp);
    pr_alert("[%016lx] pgd=%016llx", addr, pgd_val(pgd));

    do {
        pud_t *pudp, pud;
        pmd_t *pmdp, pmd;
        pte_t *ptep, pte;

        if (pgd_none(pgd) || pgd_bad(pgd))
            break;

        pudp = pud_offset(pgdp, addr);
        pud = READ_ONCE(*pudp);
        pr_cont(", pud=%016llx", pud_val(pud));
        if (pud_none(pud) || pud_bad(pud))
            break;

        pmdp = pmd_offset(pudp, addr);
        pmd = READ_ONCE(*pmdp);
        pr_cont(", pmd=%016llx", pmd_val(pmd));
        if (pmd_none(pmd) || pmd_bad(pmd))
            break;

        ptep = pte_offset_map(pmdp, addr);
        pte = READ_ONCE(*ptep);
        pr_cont(", pte=%016llx", pte_val(pte));
        pte_unmap(ptep);
    } while(0);

    pr_cont("\n");
}

 

pgd_addr_end macro

此macro是取得下一个pgd,如果end在当前pgd size范围之内,则返回end;否则end在当前pgd范围之外(大于当前pgd end),则返回下一个pgd的start address(下一个pgd start = curr pgd start + PGDIR_SIZE)

一般的case下,它是返回的end,因为一个pgd size 1G(在3级页表的配置下)

#define pgd_addr_end(addr, end)                        \
({    unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;    \
    (__boundary - 1 < (end) - 1)? __boundary: (end);        \
})

获取struct page的物理地址

struct page数组是存放在vmemmap区,可以使用下面的函数获取这个struct page对应的PA:

    page = alloc_page(GFP_KERNEL);

    if(page == NULL)
    {
        pr_err("alloc_page failed.\n");
        return;
    }
    pgd = pgd_offset_k((unsigned long)page);
    if (pgd_none(*pgd))
    {
        pr_err("pgd is none.\n");
        return;
    }

    pmd = pmd_offset((pud_t*)pgd, (unsigned long)page);
    if (pmd_none(*pmd))
    {
        pr_err("pmd is none.\n");
        return;
    }
    pr_emerg("page struct's va addr: %#px, pa addr: %#lx.\n", page, (PTE_ADDR_LOW&pmd->pmd)+0x1fffff&pmd->pmd);

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

 

posted @ 2020-03-29 21:48  aspirs  阅读(744)  评论(0编辑  收藏  举报