slab分配object

在numa架构下,slab分配object:

3192static __always_inline void *
3193__do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
3194{
3195    void *objp;

...
3202 objp = ____cache_alloc(cache, flags); 3203 3204 /* 3205 * We may just have run out of memory on the local node. 3206 * ____cache_alloc_node() knows how to locate memory on other nodes 3207 */ 3208 if (!objp) 3209 objp = ____cache_alloc_node(cache, flags, numa_mem_id()); 3210 3211 out: 3212 return objp; 3213}

首先,调用____cache_alloc来分配,该函数实现如下:

2920static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
2921{
2922    void *objp;
2923    struct array_cache *ac;
2924    bool force_refill = false;
2925
2926    check_irq_off();
2927
2928    ac = cpu_cache_get(cachep);
2929    if (likely(ac->avail)) {
2930        ac->touched = 1;
2931        objp = ac_get_obj(cachep, ac, flags, false);
2932
2933        /*
2934         * Allow for the possibility all avail objects are not allowed
2935         * by the current flags
2936         */
2937        if (objp) {
2938            STATS_INC_ALLOCHIT(cachep);
2939            goto out;
2940        }
2941        force_refill = true;
2942    }
2943
2944    STATS_INC_ALLOCMISS(cachep);
2945    objp = cache_alloc_refill(cachep, flags, force_refill);
2946    /*
2947     * the 'ac' may be updated by cache_alloc_refill(),
2948     * and kmemleak_erase() requires its correct value.
2949     */
2950    ac = cpu_cache_get(cachep);
2951
2952out:
2953    /*
2954     * To avoid a false negative, if an object that is in one of the
2955     * per-CPU caches is leaked, we need to make sure kmemleak doesn't
2956     * treat the array pointers as a reference to the object.
2957     */
2958    if (objp)
2959        kmemleak_erase(&ac->entry[ac->avail]);
2960    return objp;
2961}

1. 先从array cache里面去找,如果找到,返回,如果没找到,走到2.

2.调用cache_alloc_refill来从node的shared里去找object,或者slab的partial/free list里面获取object然后填充到cpu的array cache.

cache_alloc_refill实现如下:

2751static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
2752                            bool force_refill)
2753{
2754    int batchcount;
2755    struct kmem_cache_node *n;
2756    struct array_cache *ac;
2757    int node;
2758
2759    check_irq_off();
2760    node = numa_mem_id();
2761    if (unlikely(force_refill))
2762        goto force_grow;
2763retry:
2764    ac = cpu_cache_get(cachep);
2765    batchcount = ac->batchcount;
2766    if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2767        /*
2768         * If there was little recent activity on this cache, then
2769         * perform only a partial refill.  Otherwise we could generate
2770         * refill bouncing.
2771         */
2772        batchcount = BATCHREFILL_LIMIT;
2773    }
2774    n = get_node(cachep, node);
2775
2776    BUG_ON(ac->avail > 0 || !n);
2777    spin_lock(&n->list_lock);
2778
2779    /* See if we can refill from the shared array */
2780    if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
2781        n->shared->touched = 1;
2782        goto alloc_done;
2783    }
2784
2785    while (batchcount > 0) {
2786        struct list_head *entry;
2787        struct page *page;
2788        /* Get slab alloc is to come from. */
2789        entry = n->slabs_partial.next;
2790        if (entry == &n->slabs_partial) {
2791            n->free_touched = 1;
2792            entry = n->slabs_free.next;
2793            if (entry == &n->slabs_free)
2794                goto must_grow;
2795        }
2796
2797        page = list_entry(entry, struct page, lru);
2798        check_spinlock_acquired(cachep);
2799
2800        /*
2801         * The slab was either on partial or free list so
2802         * there must be at least one object available for
2803         * allocation.
2804         */
2805        BUG_ON(page->active >= cachep->num);
2806
2807        while (page->active < cachep->num && batchcount--) {
2808            STATS_INC_ALLOCED(cachep);
2809            STATS_INC_ACTIVE(cachep);
2810            STATS_SET_HIGH(cachep);
2811
2812            ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
2813                                    node));
2814        }
2815
2816        /* move slabp to correct slabp list: */
2817        list_del(&page->lru);
2818        if (page->active == cachep->num)
2819            list_add(&page->lru, &n->slabs_full);
2820        else
2821            list_add(&page->lru, &n->slabs_partial);
2822    }
2823
2824must_grow:
2825    n->free_objects -= ac->avail;
2826alloc_done:
2827    spin_unlock(&n->list_lock);
2828
2829    if (unlikely(!ac->avail)) {
2830        int x;
2831force_grow:
2832        x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
2833
2834        /* cache_grow can reenable interrupts, then ac could change. */
2835        ac = cpu_cache_get(cachep);
2836        node = numa_mem_id();
2837
2838        /* no objects in sight? abort */
2839        if (!x && (ac->avail == 0 || force_refill))
2840            return NULL;
2841
2842        if (!ac->avail)        /* objects refilled by interrupt? */
2843            goto retry;
2844    }
2845    ac->touched = 1;
2846
2847    return ac_get_obj(cachep, ac, flags, force_refill);
2848}

3. 若从n->shared里面可以transfer nr(nr>0)个object,返回,分配成功。

4. 若n->shared也没有可用的object,则从slab的partial/free list里获取object,填充ac.

page->active是该slab里面已经使用的object的数量。

ac->available是ac里面可用的object的index.递减使用。

注意2825 n->free_objects -= ac->avail;  说明当ac被填充后,该ac里面的object就认为被分配出去了。

如果3和4均未成功transfer object到ac,只能重新申请slab。如cache_grow的实现:

2588static int cache_grow(struct kmem_cache *cachep,
2589        gfp_t flags, int nodeid, struct page *page)
2590{
2591    void *freelist;
2592    size_t offset;
2593    gfp_t local_flags;
2594    struct kmem_cache_node *n;
2595
2596    /*
2597     * Be lazy and only check for valid flags here,  keeping it out of the
2598     * critical path in kmem_cache_alloc().
2599     */
2600    BUG_ON(flags & GFP_SLAB_BUG_MASK);
2601    local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2602
2603    /* Take the node list lock to change the colour_next on this node */
2604    check_irq_off();
2605    n = get_node(cachep, nodeid);
2606    spin_lock(&n->list_lock);
2607
2608    /* Get colour for the slab, and cal the next value. */
2609    offset = n->colour_next;
2610    n->colour_next++;
2611    if (n->colour_next >= cachep->colour)
2612        n->colour_next = 0;
2613    spin_unlock(&n->list_lock);
2614
2615    offset *= cachep->colour_off;
2616
2617    if (local_flags & __GFP_WAIT)
2618        local_irq_enable();
2619
2620    /*
2621     * The test for missing atomic flag is performed here, rather than
2622     * the more obvious place, simply to reduce the critical path length
2623     * in kmem_cache_alloc(). If a caller is seriously mis-behaving they
2624     * will eventually be caught here (where it matters).
2625     */
2626    kmem_flagcheck(cachep, flags);
2627
2628    /*
2629     * Get mem for the objs.  Attempt to allocate a physical page from
2630     * 'nodeid'.
2631     */
2632    if (!page)
2633        page = kmem_getpages(cachep, local_flags, nodeid);
2634    if (!page)
2635        goto failed;
2636
2637    /* Get slab management. */
2638    freelist = alloc_slabmgmt(cachep, page, offset,
2639            local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
2640    if (!freelist)
2641        goto opps1;
2642
2643    slab_map_pages(cachep, page, freelist);
2644
2645    cache_init_objs(cachep, page);
2646
2647    if (local_flags & __GFP_WAIT)
2648        local_irq_disable();
2649    check_irq_off();
2650    spin_lock(&n->list_lock);
2651
2652    /* Make slab active. */
2653    list_add_tail(&page->lru, &(n->slabs_free));
2654    STATS_INC_GROWN(cachep);
2655    n->free_objects += cachep->num;
2656    spin_unlock(&n->list_lock);
2657    return 1;
2658opps1:
2659    kmem_freepages(cachep, page);
2660failed:
2661    if (local_flags & __GFP_WAIT)
2662        local_irq_disable();
2663    return 0;
2664}

申请完pages之后,申请slabmgmt.如下:

2445static void *alloc_slabmgmt(struct kmem_cache *cachep,
2446                   struct page *page, int colour_off,
2447                   gfp_t local_flags, int nodeid)
2448{
2449    void *freelist;
2450    void *addr = page_address(page);
2451
2452    if (OFF_SLAB(cachep)) {
2453        /* Slab management obj is off-slab. */
2454        freelist = kmem_cache_alloc_node(cachep->freelist_cache,
2455                          local_flags, nodeid);
2456        if (!freelist)
2457            return NULL;
2458    } else {
2459        freelist = addr + colour_off;
2460        colour_off += cachep->freelist_size;
2461    }
2462    page->active = 0;
2463    page->s_mem = addr + colour_off;
2464    return freelist;
2465}

slabmgmt可以放在slab内部,也可以放在slab外部。放在slab外部的条件如下:

2194    /*
2195     * Determine if the slab management is 'on' or 'off' slab.
2196     * (bootstrapping cannot cope with offslab caches so don't do
2197     * it too early on. Always use on-slab management when
2198     * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
2199     */
2200    if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init &&
2201        !(flags & SLAB_NOLEAKTRACE))
2202        /*
2203         * Size is large, assume best to place the slab management obj
2204         * off-slab (should allow better packing of objs).
2205         */
2206        flags |= CFLGS_OFF_SLAB;
2207

colour_off

freelist_size

obj…

如果在管理节点在slab内部,结构图如上。如果开启了CONFIG_DEBUG_SLAB_LEAK宏,freelist_size后面还会有每个object的状态。

然后初始化page和object。

 

posted @ 2018-12-20 15:02  penghan  阅读(579)  评论(0编辑  收藏  举报