dpdk 堆内存 + fbarray + rte_memseg

 

 

 

  rte_memseg

memseg 数组是维护物理地址的,在上面讲到struct hugepage结构对每个hugepage物理页面都存储了它在程序里面的虚存地址。memseg 数组的作用是将物理地址、虚拟地址都连续的hugepage,并且都在同一个socket,pagesize 也相同的hugepage页面集合,把它们都划在一个memseg结构里面,这样做的好处就是优化内存。



rte_memseg这个结构也很简单:

1)         phys_addr:这个memseg的包含的所有的hugepage页面的起始物理地址;

2)         addr:这些hugepage页面的起始的虚存地址;

3)         len:这个memseg的包含的空间size

4)         hugepage_sz; 这些页面的size 2M /1G?

 

 

rte_eal_malloc_heap_init
    rte_memseg_contig_walk遍历memseg list中连续的mem seg,然后使用malloc_add_seg将这些内存加入heap的管理
    rte_memseg_contig_walk(malloc_add_seg, NULL);
malloc_add_seg:
     malloc_heap_add_memory(heap, found_msl, ms->addr, len);

 

void *rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg)
{
    struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; 
    int socket, i;
    void *ret;
   
    /* 如果要分配的内存大小为0,或者指定的对齐大小并不是2的幂值,则直接返回空 */  
    if (size == 0 || (align && !rte_is_power_of_2(align))) 
        return NULL; 
    /* 如果当前系统没使用大页,则设置指定分配的numa 结点为 SOCKET_ID_ANY */
    if (!rte_eal_has_hugepages()) 
        socket_arg = SOCKET_ID_ANY; 
    /* 如果指定分配内存的 Numa 结点为 SOCKET_ID_ANY ,则自动获取当前线程所在的 Numa结点 */
    if (socket_arg == SOCKET_ID_ANY) 
        socket = malloc_get_numa_socket();
    else 
        socket = socket_arg;

    /* Check socket parameter */
    if (socket >= RTE_MAX_NUMA_NODES) 
        return NULL; 

    /* 在与指定 Numa 结点相同的 malloc_heap 分配指定大小的内存 */
    ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type,  size, 0, align == 0 ? 1 : align, 0);
    if (ret != NULL || socket_arg != SOCKET_ID_ANY) 
        return ret;
   
    /* 如果在当前 Numa 结点上分配内存失败,就去其他Numa结点上尝试 */ 
    for (i = 0; i < RTE_MAX_NUMA_NODES; i++) {
        /* we already tried this one */ 
        if (i == socket) 
            continue; 
        
        ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, size, 0, align == 0 ? 1 : align, 0); 
        if (ret != NULL) 
            return ret; 
    }                                                                           

    return NULL;  
} 

 

 

#0 0x00005555555a4211 in alloc_seg (ms=0x20000002e000, addr=0x200000200000, socket_id=0, hi=0x5555558831d8 <internal_config+248>, list_idx=0, seg_idx=0) at /spdk/dpdk/lib/librte_eal/linuxapp/eal/eal_memalloc.c:722
#1 0x00005555555a4a41 in alloc_seg_walk (msl=0x555555805f9c <early_mem_config+124>, arg=0x7fffffffbdc0) at /spdk/dpdk/lib/librte_eal/linuxapp/eal/eal_memalloc.c:926
#2 0x00005555555ae930 in rte_memseg_list_walk_thread_unsafe (func=0x5555555a47d1 <alloc_seg_walk>, arg=0x7fffffffbdc0) at /spdk/dpdk/lib/librte_eal/common/eal_common_memory.c:658
#3 0x00005555555a4fa3 in eal_memalloc_alloc_seg_bulk (ms=0x55555588ec40, n_segs=1, page_sz=2097152, socket=0, exact=true) at /spdk/dpdk/lib/librte_eal/linuxapp/eal/eal_memalloc.c:1086
#4 0x00005555555c28c6 in alloc_pages_on_heap (heap=0x55555580879c <early_mem_config+10364>, pg_sz=2097152, elt_size=16384, socket=0, flags=0, align=64, bound=0, contig=false, ms=0x55555588ec40, n_segs=1) at /spdk/dpdk/lib/librte_eal/common/malloc_heap.c:307
#5 0x00005555555c2b1a in try_expand_heap_primary (heap=0x55555580879c <early_mem_config+10364>, pg_sz=2097152, elt_size=16384, socket=0, flags=0, align=64, bound=0, contig=false) at /spdk/dpdk/lib/librte_eal/common/malloc_heap.c:403
#6 0x00005555555c2d7a in try_expand_heap (heap=0x55555580879c <early_mem_config+10364>, pg_sz=2097152, elt_size=16384, socket=0, flags=0, align=64, bound=0, contig=false) at /spdk/dpdk/lib/librte_eal/common/malloc_heap.c:494
#7 0x00005555555c32e7 in alloc_more_mem_on_socket (heap=0x55555580879c <early_mem_config+10364>, size=16384, socket=0, flags=0, align=64, bound=0, contig=false) at /spdk/dpdk/lib/librte_eal/common/malloc_heap.c:622
#8 0x00005555555c3474 in malloc_heap_alloc_on_heap_id (type=0x5555555e94e5 "rte_services", size=16384, heap_id=0, flags=0, align=64, bound=0, contig=false) at /spdk/dpdk/lib/librte_eal/common/malloc_heap.c:676
#9 0x00005555555c35a4 in malloc_heap_alloc (type=0x5555555e94e5 "rte_services", size=16384, socket_arg=-1, flags=0, align=64, bound=0, contig=false) at /spdk/dpdk/lib/librte_eal/common/malloc_heap.c:714
#10 0x00005555555be9a7 in rte_malloc_socket (type=0x5555555e94e5 "rte_services", size=16384, align=64, socket_arg=-1) at /spdk/dpdk/lib/librte_eal/common/rte_malloc.c:58
#11 0x00005555555bea06 in rte_zmalloc_socket (type=0x5555555e94e5 "rte_services", size=16384, align=64, socket=-1) at /spdk/dpdk/lib/librte_eal/common/rte_malloc.c:77
#12 0x00005555555bea33 in rte_zmalloc (type=0x5555555e94e5 "rte_services", size=16384, align=64) at /spdk/dpdk/lib/librte_eal/common/rte_malloc.c:86
#13 0x00005555555beaa5 in rte_calloc (type=0x5555555e94e5 "rte_services", num=64, size=256, align=64) at /spdk/dpdk/lib/librte_eal/common/rte_malloc.c:104
#14 0x00005555555c684a in rte_service_init () at /spdk/dpdk/lib/librte_eal/common/rte_service.c:82
#15 0x0000555555597677 in rte_eal_init (argc=5, argv=0x55555588ebb0) at /spdk/dpdk/lib/librte_eal/linuxapp/eal/eal.c:1070
#16 0x0000555555595226 in spdk_env_init (opts=0x7fffffffcbe0) at init.c:397
#17 0x000055555555f074 in main (argc=11, argv=0x7fffffffcd18) at perf.c:1743

 

 

 

alloc_seg_walk(const struct rte_memseg_list *msl, void *arg)
{
    struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
    struct alloc_walk_param *wa = arg;
    struct rte_memseg_list *cur_msl;
    size_t page_sz;
    int cur_idx, start_idx, j, dir_fd = -1;
    unsigned int msl_idx, need, i;

    if (msl->page_sz != wa->page_sz)
        return 0;
    if (msl->socket_id != wa->socket)
        return 0;

    page_sz = (size_t)msl->page_sz;

    msl_idx = msl - mcfg->memsegs;
    cur_msl = &mcfg->memsegs[msl_idx];

    need = wa->n_segs;

    /* try finding space in memseg list */
    if (wa->exact) {
        /* if we require exact number of pages in a list, find them */
        cur_idx = rte_fbarray_find_next_n_free(&cur_msl->memseg_arr, 0,
                need);
        if (cur_idx < 0)
            return 0;
        start_idx = cur_idx;
    } else {
        int cur_len;

        /* we don't require exact number of pages, so we're going to go
         * for best-effort allocation. that means finding the biggest
         * unused block, and going with that.
         */
        cur_idx = rte_fbarray_find_biggest_free(&cur_msl->memseg_arr,
                0);
        if (cur_idx < 0)
            return 0;
        start_idx = cur_idx;
        /* adjust the size to possibly be smaller than original
         * request, but do not allow it to be bigger.
         */
        cur_len = rte_fbarray_find_contig_free(&cur_msl->memseg_arr,
                cur_idx);
        need = RTE_MIN(need, (unsigned int)cur_len);
    }

    /* do not allow any page allocations during the time we're allocating,
     * because file creation and locking operations are not atomic,
     * and we might be the first or the last ones to use a particular page,
     * so we need to ensure atomicity of every operation.
     *
     * during init, we already hold a write lock, so don't try to take out
     * another one.
     */
    if (wa->hi->lock_descriptor == -1 && !internal_config.in_memory) {
        dir_fd = open(wa->hi->hugedir, O_RDONLY);
        if (dir_fd < 0) {
            RTE_LOG(ERR, EAL, "%s(): Cannot open '%s': %s\n",
                __func__, wa->hi->hugedir, strerror(errno));
            return -1;
        }
        /* blocking writelock */
        if (flock(dir_fd, LOCK_EX)) {
            RTE_LOG(ERR, EAL, "%s(): Cannot lock '%s': %s\n",
                __func__, wa->hi->hugedir, strerror(errno));
            close(dir_fd);
            return -1;
        }
    }

    for (i = 0; i < need; i++, cur_idx++) {
        struct rte_memseg *cur;
        void *map_addr;

        cur = rte_fbarray_get(&cur_msl->memseg_arr, cur_idx);
        map_addr = RTE_PTR_ADD(cur_msl->base_va,
                cur_idx * page_sz);
       //#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x)))
        if (alloc_seg(cur, map_addr, wa->socket, wa->hi,
                msl_idx, cur_idx)) {
            RTE_LOG(DEBUG, EAL, "attempted to allocate %i segments, but only %i were allocated\n",
                need, i);

            /* if exact number wasn't requested, stop */
            if (!wa->exact)
                goto out;

            /* clean up */
            for (j = start_idx; j < cur_idx; j++) {
                struct rte_memseg *tmp;
                struct rte_fbarray *arr =
                        &cur_msl->memseg_arr;

                tmp = rte_fbarray_get(arr, j);
                rte_fbarray_set_free(arr, j);

                /* free_seg may attempt to create a file, which
                 * may fail.
                 */
                if (free_seg(tmp, wa->hi, msl_idx, j))
                    RTE_LOG(DEBUG, EAL, "Cannot free page\n");
            }
            /* clear the list */
            if (wa->ms)
                memset(wa->ms, 0, sizeof(*wa->ms) * wa->n_segs);

            if (dir_fd >= 0)
                close(dir_fd);
            return -1;
        }
        if (wa->ms)
            wa->ms[i] = cur;

        rte_fbarray_set_used(&cur_msl->memseg_arr, cur_idx);
    }
out:
    wa->segs_allocated = i;
    if (i > 0)
        cur_msl->version++;
    if (dir_fd >= 0)
        close(dir_fd);
    /* if we didn't allocate any segments, move on to the next list */
    return i > 0;
}

 

 

static int
alloc_more_mem_on_socket(struct malloc_heap *heap, size_t size, int socket,
        unsigned int flags, size_t align, size_t bound, bool contig)
{
    struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config;
    struct rte_memseg_list *requested_msls[RTE_MAX_MEMSEG_LISTS];
    struct rte_memseg_list *other_msls[RTE_MAX_MEMSEG_LISTS];
    uint64_t requested_pg_sz[RTE_MAX_MEMSEG_LISTS];
    uint64_t other_pg_sz[RTE_MAX_MEMSEG_LISTS];
    uint64_t prev_pg_sz;
    int i, n_other_msls, n_other_pg_sz, n_requested_msls, n_requested_pg_sz;
    bool size_hint = (flags & RTE_MEMZONE_SIZE_HINT_ONLY) > 0;
    unsigned int size_flags = flags & ~RTE_MEMZONE_SIZE_HINT_ONLY;
    void *ret;

    memset(requested_msls, 0, sizeof(requested_msls));
    memset(other_msls, 0, sizeof(other_msls));
    memset(requested_pg_sz, 0, sizeof(requested_pg_sz));
    memset(other_pg_sz, 0, sizeof(other_pg_sz));

    /*
     * go through memseg list and take note of all the page sizes available,
     * and if any of them were specifically requested by the user.
     */
    n_requested_msls = 0;
    n_other_msls = 0;
    for (i = 0; i < RTE_MAX_MEMSEG_LISTS; i++) {
        struct rte_memseg_list *msl = &mcfg->memsegs[i];

        if (msl->socket_id != socket)
            continue;

        if (msl->base_va == NULL)
            continue;

        /* if pages of specific size were requested */
        if (size_flags != 0 && check_hugepage_sz(size_flags,
                msl->page_sz))
            requested_msls[n_requested_msls++] = msl;
        else if (size_flags == 0 || size_hint)
            other_msls[n_other_msls++] = msl;
    }

    /* sort the lists, smallest first */
    qsort(requested_msls, n_requested_msls, sizeof(requested_msls[0]),
            compare_pagesz);
    qsort(other_msls, n_other_msls, sizeof(other_msls[0]),
            compare_pagesz);

    /* now, extract page sizes we are supposed to try */
    prev_pg_sz = 0;
    n_requested_pg_sz = 0;
    for (i = 0; i < n_requested_msls; i++) {
        uint64_t pg_sz = requested_msls[i]->page_sz;

        if (prev_pg_sz != pg_sz) {
            requested_pg_sz[n_requested_pg_sz++] = pg_sz;
            prev_pg_sz = pg_sz;
        }
    }
    prev_pg_sz = 0;
    n_other_pg_sz = 0;
    for (i = 0; i < n_other_msls; i++) {
        uint64_t pg_sz = other_msls[i]->page_sz;

        if (prev_pg_sz != pg_sz) {
            other_pg_sz[n_other_pg_sz++] = pg_sz;
            prev_pg_sz = pg_sz;
        }
    }

    /* finally, try allocating memory of specified page sizes, starting from
     * the smallest sizes
     */
    for (i = 0; i < n_requested_pg_sz; i++) {
        uint64_t pg_sz = requested_pg_sz[i];

        /*
         * do not pass the size hint here, as user expects other page
         * sizes first, before resorting to best effort allocation.
         */
        if (!try_expand_heap(heap, pg_sz, size, socket, size_flags,
                align, bound, contig))
            return 0;
    }
    if (n_other_pg_sz == 0)
        return -1;

    /* now, check if we can reserve anything with size hint */
    ret = find_suitable_element(heap, size, flags, align, bound, contig);
    if (ret != NULL)
        return 0;

    /*
     * we still couldn't reserve memory, so try expanding heap with other
     * page sizes, if there are any
     */
    for (i = 0; i < n_other_pg_sz; i++) {
        uint64_t pg_sz = other_pg_sz[i];

        if (!try_expand_heap(heap, pg_sz, size, socket, flags,
                align, bound, contig))
            return 0;
    }
    return -1;
}

 

posted on 2020-08-31 17:39  tycoon3  阅读(1585)  评论(0编辑  收藏  举报

导航