linux内存管理(六)- 内核新struct - folio

folio大概是5.16引入的,看起来像是page的封装,这里有一篇讲解folio很好的博客,论好名字的重要性: Linux内核page到folio的变迁-CSDN博客

struct folio {
    /* private: don't document the anon union */
    union {
        struct {
    /* public: */
            unsigned long flags;
            union {
                struct list_head lru;
    /* private: avoid cluttering the output */
                struct {
                    void *__filler;
    /* public: */
                    unsigned int mlock_count;
    /* private: */
                };
    /* public: */
            };
            struct address_space *mapping;
            pgoff_t index;
            union {
                void *private;
                swp_entry_t swap;
            };
            atomic_t _mapcount;
            atomic_t _refcount;
#ifdef CONFIG_MEMCG
            unsigned long memcg_data;
#endif
#if defined(WANT_PAGE_VIRTUAL)
            void *virtual;
#endif
#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
            int _last_cpupid;
#endif
    /* private: the union with struct page is transitional */
        };
        struct page page;
    };
    union {
        struct {
            unsigned long _flags_1;
            unsigned long _head_1;
            unsigned long _folio_avail;
    /* public: */
            atomic_t _entire_mapcount;
            atomic_t _nr_pages_mapped;
            atomic_t _pincount;
#ifdef CONFIG_64BIT
            unsigned int _folio_nr_pages;
#endif
    /* private: the union with struct page is transitional */
        };
        struct page __page_1;
    };
    union {
        struct {
            unsigned long _flags_2;
            unsigned long _head_2;
    /* public: */
            void *_hugetlb_subpool;
            void *_hugetlb_cgroup;
            void *_hugetlb_cgroup_rsvd;
            void *_hugetlb_hwpoison;
    /* private: the union with struct page is transitional */
        };
        struct {
            unsigned long _flags_2a;
            unsigned long _head_2a;
    /* public: */
            struct list_head _deferred_list;
    /* private: the union with struct page is transitional */
        };
        struct page __page_2;
    };
};

 简单来看它似乎是三个page结构的组合。与第一个page union的结构跟page结构几乎一致。引入folio是为了解决长久以来page混乱的语义。page除了可以代表单页也可以代表连续多个页面,甚至大页。page在内核中应用广泛,这种混乱增加了写代码和理解代码的难度,人为的增加混乱。folio代表一个或多个page,本身就可以代表page所有的语义。在新的内核代码中folio在很多场合完成了page的替代,但是page依然存在。

比如compound_order的实现。在folio之前是这样的。

static inline unsigned int compound_order(struct page *page)
{
    if (!PageHead(page))
        return 0;
    return page[1].compound_order;
}

先检查page是不是单页,如果是单页直接返回0,对于复合页order保存在后一个page的compound_order成员中。也即是单个page是表示不了多页的,但是folio可以。

static inline unsigned int compound_order(struct page *page)
{
    struct folio *folio = (struct folio *)page;

    if (!test_bit(PG_head, &folio->flags))
        return 0;
    return folio->_flags_1 & 0xff;
}

新的代码中首先将page强转为folio,判断其是否为复合页,如果是复合页order保存在_flags_1中。看起来也没简化,反而更复杂一点,但是可以在一个folio结构中解决问题,不再依赖于tail page。

看一下page结构

struct page {
    unsigned long flags;        /* Atomic flags, some possibly
                     * updated asynchronously */
    /*
     * Five words (20/40 bytes) are available in this union.
     * WARNING: bit 0 of the first word is used for PageTail(). That
     * means the other users of this union MUST NOT use the bit to
     * avoid collision and false-positive PageTail().
     */
    union {
        struct {    /* Page cache and anonymous pages */
            /**
             * @lru: Pageout list, eg. active_list protected by
             * lruvec->lru_lock.  Sometimes used as a generic list
             * by the page owner.
             */
            union {
                struct list_head lru;

                /* Or, for the Unevictable "LRU list" slot */
                struct {
                    /* Always even, to negate PageTail */
                    void *__filler;
                    /* Count page's or folio's mlocks */
                    unsigned int mlock_count;
                };

                /* Or, free page */
                struct list_head buddy_list;
                struct list_head pcp_list;
            };
            /* See page-flags.h for PAGE_MAPPING_FLAGS */
            struct address_space *mapping;
            union {
                pgoff_t index;        /* Our offset within mapping. */
                unsigned long share;    /* share count for fsdax */
            };
            /**
             * @private: Mapping-private opaque data.
             * Usually used for buffer_heads if PagePrivate.
             * Used for swp_entry_t if PageSwapCache.
             * Indicates order in the buddy system if PageBuddy.
             */
            unsigned long private;
        };
        struct {    /* page_pool used by netstack */
            /**
             * @pp_magic: magic value to avoid recycling non
             * page_pool allocated pages.
             */
            unsigned long pp_magic;
            struct page_pool *pp;
            unsigned long _pp_mapping_pad;
            unsigned long dma_addr;
            atomic_long_t pp_ref_count;
        };
        struct {    /* Tail pages of compound page */
            unsigned long compound_head;    /* Bit zero is set */
        };
        struct {    /* ZONE_DEVICE pages */
            /** @pgmap: Points to the hosting device page map. */
            struct dev_pagemap *pgmap;
            void *zone_device_data;
            /*
             * ZONE_DEVICE private pages are counted as being
             * mapped so the next 3 words hold the mapping, index,
             * and private fields from the source anonymous or
             * page cache page while the page is migrated to device
             * private memory.
             * ZONE_DEVICE MEMORY_DEVICE_FS_DAX pages also
             * use the mapping, index, and private fields when
             * pmem backed DAX files are mapped.
             */
        };

        /** @rcu_head: You can use this to free a page by RCU. */
        struct rcu_head rcu_head;
    };

    union {        /* This union is 4 bytes in size. */
        /*
         * If the page can be mapped to userspace, encodes the number
         * of times this page is referenced by a page table.
         */
        atomic_t _mapcount;

        /*
         * If the page is neither PageSlab nor mappable to userspace,
         * the value stored here may help determine what this page
         * is used for.  See page-flags.h for a list of page types
         * which are currently stored here.
         */
        unsigned int page_type;
    };

    /* Usage count. *DO NOT USE DIRECTLY*. See page_ref.h */
    atomic_t _refcount;

#ifdef CONFIG_MEMCG
    unsigned long memcg_data;
#endif

    /*
     * On machines where all RAM is mapped into kernel address space,
     * we can simply calculate the virtual address. On machines with
     * highmem some memory is mapped into kernel virtual memory
     * dynamically, so we need a place to store that address.
     * Note that this field could be 16 bits on x86 ... ;)
     *
     * Architectures with slow multiplication can define
     * WANT_PAGE_VIRTUAL in asm/page.h
     */
#if defined(WANT_PAGE_VIRTUAL)
    void *virtual;            /* Kernel virtual address (NULL if
                       not kmapped, ie. highmem) */
#endif /* WANT_PAGE_VIRTUAL */

#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
    int _last_cpupid;
#endif

#ifdef CONFIG_KMSAN
    /*
     * KMSAN metadata for this page:
     *  - shadow page: every bit indicates whether the corresponding
     *    bit of the original page is initialized (0) or not (1);
     *  - origin page: every 4 bytes contain an id of the stack trace
     *    where the uninitialized value was created.
     */
    struct page *kmsan_shadow;
    struct page *kmsan_origin;
#endif
} _struct_page_alignment;

1. flags

enum pageflags {
    PG_locked,        /* Page is locked. Don't touch. */
    PG_writeback,        /* Page is under writeback */
    PG_referenced,
    PG_uptodate,
    PG_dirty,
    PG_lru,
    PG_head,        /* Must be in bit 6 */
    PG_waiters,        /* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
    PG_active,
    PG_workingset,
    PG_error,
    PG_slab,
    PG_owner_priv_1,    /* Owner use. If pagecache, fs may use*/
    PG_arch_1,
    PG_reserved,
    PG_private,        /* If pagecache, has fs-private data */
    PG_private_2,        /* If pagecache, has fs aux data */
    PG_mappedtodisk,    /* Has blocks allocated on-disk */
    PG_reclaim,        /* To be reclaimed asap */
    PG_swapbacked,        /* Page is backed by RAM/swap */
    PG_unevictable,
...

flags由四部分构成,|node|zone|last_cpuid|flags|

2. mapping

最低两个bits可以用来判断是否为匿名映射或ksm映射。对于匿名映射指向anon_vma. 对于file映射指向address_space结构。

#define PAGE_MAPPING_ANON    0x1
#define PAGE_MAPPING_MOVABLE    0x2
#define PAGE_MAPPING_KSM    (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)
#define PAGE_MAPPING_FLAGS    (PAGE_MAPPING_ANON | PAGE_MAPPING_MOVABLE)

3. _refcount

表示页面在内核中的引用次数。大于0代表正在使用。

static inline void get_page(struct page *page)
{
    folio_get(page_folio(page));
}

static inline void folio_get(struct folio *folio)
{
    VM_BUG_ON_FOLIO(folio_ref_zero_or_close_to_overflow(folio), folio);
    folio_ref_inc(folio);
}

static inline void folio_ref_inc(struct folio *folio)
{
    page_ref_inc(&folio->page);
}

static inline void page_ref_inc(struct page *page)
{
    atomic_inc(&page->_refcount);
    if (page_ref_tracepoint_active(page_ref_mod))
        __page_ref_mod(page, 1);
}

folio让page操作变得非常繁琐,这样真的好吗?

分配内存时_refcount + 1, 加入lru链表时+1等。

4. _mapcount

表示这个页面被进程映射的次数,用做反向映射。-1代表没有页表映射。

 page相关的API

static inline struct zone *page_zone(const struct page *page)
{
        return &NODE_DATA(page_to_nid(page))->node_zones[page_zonenum(page)];
}
static inline int page_zone_id(struct page *page)
{
        return (page->flags >> ZONEID_PGSHIFT) & ZONEID_MASK;
}

mapping相关

struct address_space *page_mapping(struct page *page)
{
    return folio_mapping(page_folio(page));
}

struct address_space *folio_mapping(struct folio *folio)
{
    struct address_space *mapping;

    /* This happens if someone calls flush_dcache_page on slab page */
    if (unlikely(folio_test_slab(folio)))
        return NULL;

    if (unlikely(folio_test_swapcache(folio)))
        return swap_address_space(folio->swap);

    mapping = folio->mapping;
//如果是匿名页或ksm页
if ((unsigned long)mapping & PAGE_MAPPING_FLAGS) return NULL; return mapping; }

page_mapped

static inline bool page_mapped(struct page *page)
{
    if (likely(!PageCompound(page)))
        return atomic_read(&page->_mapcount) >= 0;
    return folio_large_is_mapped(page_folio(page));
}

对于普通页面只需判断_mapcount值。

 

posted on 2024-06-11 14:07  半山随笔  阅读(432)  评论(0编辑  收藏  举报

导航