STL内存配置器

本系列文章更多是笔记形式，希望能在总结过程中将一些东西理顺。难免出错，欢迎指正。

STL六大功能组件：

1.容器（containers）；2.算法（algorithm）；3.迭代器（iterator）；4.仿函数（functors）；5.配接器（adapters）；6.配置器（allcators）。

各个功能组件间存在交互关系，这里不涉及这些内容，本篇文章讨论容器的内存配置。

首先，容器用来存放数据，那么存放数据之前必须向系统申请内存资源。我们知道c++中通常用（::operator new/::operator new[]）来为对象分配内存，并调用对应的构造函数构造对象。

例如： class Foo { ... }; Foo * f = new Foo; delete f;

这个过程分两步: 1. ::operator new 配置内存; 2.调用Foo::Foo() 在申请的内存上构建对象.

STL的配置器也分两个过程进行:

1.定义std::alloc::allocate()负责申请空间, std::alloc::deallocate() 负责释放空间

2.对象构造和析构分别调用 ::construct()和::destroy() --这两个函数可查阅<<c++ primer>>

实现的代码文件在结构如下：

<memory>:

　　　　　　1.<stl_construct.h>:定义了全局的construct()和destroy()，完成对象的构造和析构，符合STL标准规范

　　　　　 2.<stl_alloc.h>:定义了一，二级配置器彼此合作，名称为alloc

　　　　　　3.<stl_uninitialized.h>:定义一些全局函数用来填充或复制大块内存数据,这里不想谈。

但是STL的容器所使用的heap内存是由SGI特殊的空间配置器 std::alloc来完成的，说他特殊是因为它不符合SGI标准，但是SGI本身有标准的空间配置器 std::allocator，

但因为其效率相对前者较低，所以容器的空间配置器为 std::alloc

例如 vector的声明： template<class T, class Alloc = alloc>

　　　　　　　　　　class vector{ ... }

其中alloc便是std::alloc,默认使用这个。

刚才说到SGI的标准配置器效率不高，那么这个std::alloc效率又高在哪里呢？

答案其实就在<stl_alloc.h>中第一的一二级配置器的配合使用上.

SGI的标准的配置器其实就是对 ::operator new()和 ::operator delete()的简单的封装,而这两个函数相当于c 中的malloc()和free()函数。

而std::alloc的分配策略如下：

1.当需要配置的区块大于 128 bytes时，直接调用一级配置器，也就是封装 malloc()和free()

  1 #if 0
  2 #    include<new>
  3 #    define __THROW_BAD_ALLOC throw bad_alloc
  4 #elif !defined(__THROW_BAD_ALLOC)
  5 #    include<iostream.h>
  6 #    define __THROW_BAD_ALLOC cerr << "out of memory" << endl; exit(1)
  7 #endif
  8 
  9 template<int inst>
 10 class __malloc_alloc_template
 11 {
 12     private:
 13         static void * oom_malloc(size_t);                  //oom:out_of_memory,当malloc不成功时调用此函数
 14         static void * oom_realloc(void *, size_t);       //当realloc()失败时调用
 15         static void (* __malloc_alloc_oom_handler)();       //当申请失败时，可以自己定制的一个处理函数，此函数类似调用::operator new时的全局std::new_handler()
 16                                    //很重要
 17 
 18     public:
 19         static void * allocate(size_t n)
 20         {
 21             void * result = malloc(n);
 22             if (0 == result)
 23             {
 24                 result = oom_malloc(n);
 25             }
 26 
 27             return result;
 28         }
 29 
 30         static void * deallocate(void *p, size_t n)
 31         {
 32             free(p);
 33         }
 34 
 35         static void * reallocate(void *p, size_t new_sz)
 36         {
 37             void *result = realloc(p, new_sz);
 38             if ( 0 == result) 
 39             {
 40                 result = oom_realloc(p, new_sz);
 41             }
 42 
 43             return result;
 44         }
 45 
 46         //set __oom_handler
 47         static void (* set_malloc_handler(void  (*f)())) ()          //由于没有用::operator new来配置内存，所以不能调用c++机制的 new_handler（下篇文章详谈）,只能自己定制
 48         {
 49             void (* old)() = __malloc_alloc_oom_handler;         //一般思路就是，设置新的，返回旧的
 50             __malloc_alloc_oom_handler = f;
 51 
 52             return old;
 53         }
 54 };
 55 
 56 // init static func handler
 57 template <int inst>
 58 void (* __malloc_alloc_template<inst>::__malloc_alloc_oom_handler)() = 0;
 59 
 60 template <int inst>
 61 static void * __malloc_alloc_template<inst>::oom_malloc(size_t n)
 62 {
 63     void (* my_malloc_handler)();
 64     void * result;
 65 
 66     for (;;)
 67     {
 68         my_malloc_handler = __malloc_alloc_oom_handler;
 69         if (0 == my_malloc_handler)                    
 70         {
 71             __THROW_BAD_ALLOC;
 72         }
 73         (*my_malloc_handler)();                        //若是有申请失败处理函数，则调用之，因为按照c++的规矩，这个函数一般要进行收集一些能用的内存，供malloc下次调用，或者直接退出程序
 74         result = malloc(n);
 75 
 76         if (result)
 77         {
 78             return result;
 79         }
 80     } 
 81 }
 82 
 83 template <int inst>
 84 static void * __malloc_alloc_template::oom_realloc(void *p, size_t new_sz)
 85 {
 86     void (* my_realloc_handler)();
 87     void result;
 88 
 89     for (;;)
 90     {
 91         my_realloc_handler = __malloc_alloc_oom_handler;
 92         if (0 == my_realloc_handler)
 93         {
 94             __THROW_BAD_ALLOC;
 95         }
 96 
 97         (*my_realloc_handler)();
 98 
 99         result = realloc(p, new_sz);
100         if(result)
101         {
102             return result;
103         }
104     }
105 }
106 
107 typedef __malloc_alloc_template<0> malloc_alloc;

View Code

  1 #if 0
  2 #    include<new>
  3 #    define __THROW_BAD_ALLOC throw bad_alloc
  4 #elif !defined(__THROW_BAD_ALLOC)
  5 #    include<iostream.h>
  6 #    define __THROW_BAD_ALLOC cerr << "out of memory" << endl; exit(1)
  7 #endif
  8 
  9 template<int inst>
 10 class __malloc_alloc_template
 11 {
 12     private:
 13         static void * oom_malloc(size_t);
 14         static void * oom_realloc(void *, size_t);
 15         static void (* __malloc_alloc_oom_handler)();
 16 
 17     public:
 18         static void * allocate(size_t n)
 19         {
 20             void * result = malloc(n);
 21             if (0 == result)
 22             {
 23                 result = oom_malloc(n);
 24             }
 25 
 26             return result;
 27         }
 28 
 29         static void * deallocate(void *p, size_t n)
 30         {
 31             free(p);
 32         }
 33 
 34         static void * reallocate(void *p, size_t new_sz)
 35         {
 36             void *result = realloc(p, new_sz);
 37             if ( 0 == result) 
 38             {
 39                 result = oom_realloc(p, new_sz);
 40             }
 41 
 42             return result;
 43         }
 44 
 45         //set __oom_handler
 46         static void (* set_malloc_handler(void  (*f)())) ()
 47         {
 48             void (* old)() = __malloc_alloc_oom_handler;
 49             __malloc_alloc_oom_handler = f;
 50 
 51             return old;
 52         }
 53 };
 54 
 55 // init static func handler
 56 template <int inst>
 57 void (* __malloc_alloc_template<inst>::__malloc_alloc_oom_handler)() = 0;
 58 
 59 template <int inst>
 60 static void * __malloc_alloc_template<inst>::oom_malloc(size_t n)
 61 {
 62     void (* my_malloc_handler)();
 63     void * result;
 64 
 65     for (;;)
 66     {
 67         my_malloc_handler = __malloc_alloc_oom_handler;
 68         if (0 == my_malloc_handler)
 69         {
 70             __THROW_BAD_ALLOC;
 71         }
 72         (*my_malloc_handler)();
 73         result = malloc(n);
 74 
 75         if (result)
 76         {
 77             return result;
 78         }
 79     } 
 80 }
 81 
 82 template <int inst>
 83 static void * __malloc_alloc_template::oom_realloc(void *p, size_t new_sz)
 84 {
 85     void (* my_realloc_handler)();
 86     void result;
 87 
 88     for (;;)
 89     {
 90         my_realloc_handler = __malloc_alloc_oom_handler;
 91         if (0 == my_realloc_handler)
 92         {
 93             __THROW_BAD_ALLOC;
 94         }
 95 
 96         (*my_realloc_handler)();
 97 
 98         result = realloc(p, new_sz);
 99         if(result)
100         {
101             return result;
102         }
103     }
104 }
105 
106 typedef __malloc_alloc_template<0> malloc_alloc;

2.当需要配置的区块小于 128bytes时，调用第二级适配器

那么第二级适配器由哪些组成呢?

1:一个有16个单元的指针数组，每个单元中的指针指向一个链表，链表元素如下。

union obj

{

　　union obj * free_list_link;

　　char client_data[1];

}

这16个单元从0-15管理大小分别为8,16,24,...128bytes的小额区块，也就是每个单元只想的链表的元素大小分别为这些。

假如当申请一个大小为[1,8]或[16,24]大小的空间时，该配置器需从大小为8，24的链表中取一个元素来给客户端, 那么如何根据申请的大小来判断分配那种链表中的元素呢？，如下

enum {__ALIGN = 8};

static size_t FREELIST_INDEX(size_t bytes)

{

　　return ( ( (bytes) + __ALIGN - 1) / __ALIGN - 1);

}

可自行测试，例如申请7bytes的空间，带入后得到数组的index为0，即需要从该元素指针指向的链表申请空间，以此类推.

2:内存池。有了这样的维护不同大小的链表的数组，但是链表的各个元素的空间又由哪来的呢，std::alloc 还维护了一个内存池,也就是用两个指针一个只想内存池开头，另一个指向结尾，每当一个链表的元素用光时，当再次有请求改大小的链表元素时，

就会先向该内存池要空间，默认从该内存池中取出20个对象大小的空间，然后将这些空间在重新组织成链表的形式，放到数组中。

3:堆内存。当内存池中的空间用完后，便向堆申请空间。

4:若堆中的内存都没有了，那么这时候该怎么办呢？这时候就像链表元素更大的链表要空间.例如，当申请19bytes时，首先向元素大小为24的链表要空间，若没有了，想内存池要，若有，申请20*24的空间，然后

重新组织成链表形式放回数组，并分配1个空间，若内存池也没有了，那就向堆要空间，如果堆也没了，这时，想元素大小为32或更大的链表要空间，如果有的话就去除一个分配下去，然后把剩余的空间放到对应大小

的链表中，例如申请24bytes的时候，堆中也没有可用的了，那么这时需要向32以及更大的去要一个元素，这里假定是32的也没了，但是64的有空间，这时便从元素大小为64的链表中取一个下来，分给24给用户，剩下的40，放到元素大小为40的链表中。

总结起来就是对应客户申请大小的链表->内存池 ->堆->元素大小更大的链表->内存不足处理程序.

本想介绍下二级配置器有哪些东西，一不小心把过程说了出来。

下边分析源码：

//下面是第二级配置器
246 //主要是维护一个内存池，用来小于128byte的小型区块内存的分配
247 //其中，有多个链表，各链表中的node大小从8-128byte,都是8的倍数
248 //分配时，不是8的倍数，上调至最近的8的倍数，
249 //然后从相应链表中取下一个对应大小的node分配给请求
250 #ifdef __SUNPRO_CC
251     enum {__ALIGN = 8};  //小型区块的上调边界，即次对于用户申请的空间大小n都要调整成最接近且大于n的8的倍数
252     enum {__MAX_BYTES = 128};   //用户申请的最大空间大小，若大于这个值，调用一级配置器
253     enum {__NFREELISTS = __MAX_BYTES/__ALIGN}; //数组的长度
254 #endif
255 
256 //第二级配置器
257 template <bool threads, int inst>
258 class __default_alloc_template
259 {
260     private:
261 # ifndef __SUNPRO_CC
262     enum {__ALIGN = 8};  //小型区块的上调边界
263     enum {__MAX_BYTES = 128};   //小型区块的上限
264     enum {__NFREELISTS = __MAX_BYTES/__ALIGN};
265 # endif
266     //大小上调至8的倍数
267     static size_t ROUND_UP(size_t bytes)
268     {
269         return (((bytes) + __ALIGN-1) & ~(__ALIGN - 1));
270     }
271 __PRIVATE:
272     union obj
273     {
274         union obj * free_list_link;  //用于在链表中指向下一个节点
275         char client_data[1]; //用于存储实际区块的内存地址，由于这是一个union，很好的节约了这个数据的内存
276     };
277     private:
278 # ifdef __SUNPRO_CC
279     static obj * __VOLATILE free_list[]; 
280 # else
281     static obj * __VOLATILE free_list[__NFREELISTS];//前面提到的那个有16个元素的数组，每个数组元素是个static obj* __VOLATILE,指向链表第一个元素
282 # endif
283     static size_t FREELIST_INDEX(size_t bytes)    //此函数用来根据用户传来的bytes，找到对应数组元素的index
284     {
285         return (((bytes) + __ALIGN-1)/__ALIGN - 1);
286     }
287 
288     //返回大小为n的对象，并可能加入大小为n的其他区块到free list
289     static void *refill(size_t n);
290     //配置一块空间，可容纳nobjs个大小为"size"的区块
291     //如果配置nobjs个区块有所不便，nobjs可能会降低
292     static char *chunk_alloc(size_t size, int &nobjs);
293 
294     //chunk 分配、配置的状态
295     static char *start_free; //内存池起始位置。只在chunk_alloc()中变化
296     static char *end_free;   //内存池结束位置。只在chunk_alloc()中变化
297     static size_t heap_size; //内存池空间不够时，向堆空间申请的大小
298 /*
//初始化各个static变量
 template <bool threads, int inst>
572 char *__default_alloc_template<threads, inst>::start_free = 0; //设置初始值
573 
574 template <bool threads, int inst>
575 char *__default_alloc_template<threads, inst>::end_free = 0; //设置初始值
576 
577 template <bool threads, int inst>
578 size_t __default_alloc_template<threads, inst>::heap_size = 0; //设置初始值
579 
580 //初始化16种大小的区块链表为空
581 template <bool threads, int inst>
582 typename __default_alloc_template<threads, inst>::obj * __VOLATILE
583 __default_alloc_template<threads, inst>::free_list[
584 # ifdef __SUNPRO_CC
585     __NFREELISTS
586 # else
587     __default_alloc_template<threads, inst>::__NFREELISTS
588 # endif
589 ] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, };

以上是除去锁以后的，加锁的以后讨论。下面看看二级配置器是如何配置空间的：

static void * allocate(size_t n)  //std::alloc的申请函数
337         {
338             obj * __VOLATILE * my_free_list;
339             obj * __RESTRICT result;
340 
341             //需要分配的大小大于二级配置器的__MAX_BYTES，直接使用第一级配置器
342             if (n > (size_t) __MAX_BYTES)
343             {
344                 return(malloc_alloc::allocate(n));
345             }
346             my_free_list = free_list + FREELIST_INDEX(n); //找到比需要分配的大小大，且最接近的大小块所在的链表所在free_list数组中的位置
347             
352             result = *my_free_list;  //取出找的对应链表的指向第一个节点的指针，插入也是从第一个插入，前插。
353             if (result == 0)    //对应的链表中没有剩余未分配的节点区块
354             {
355                 void *r = refill(ROUND_UP(n));    //再从内存池中分配一批，需求大小的区块(实际大小是请求大小上调至8的倍数后的数值),
356                                 //然后，放入对应链表，待分配给请求
357                 return r;
358             }
359             //如果对应大小区块的链表中不为空，还有待分配的区块，取出第一个节点
360             *my_free_list = result -> free_list_link;
361             return (result);
362         };
363 
364         //p不可以是0
365         static void deallocate(void *p, size_t n)
366         {
367             obj *q = (obj *)p;
368             obj * __VOLATILE * my_free_list;
369 
370             //大于区块大小上限的，直接调用第一级配置器释放
371             if (n > (size_t) __MAX_BYTES)
372             {
373                 malloc_alloc::deallocate(p, n);
374                 return;
375             }
376             my_free_list = free_list + FREELIST_INDEX(n);
377            
382             //头插法，插入对应大小的区块链表
383             q -> free_list_link = *my_free_list;
384             *my_free_list = q;
385           }
387

可以看到，allocate()函数的过程如上所述，先从链表空间取，若链表为空，则去内存池去申请，调用的函数是 refill(ROUND_UP(n)),因为从内存池中获得的都是8的倍数，所以先将 n ROUND_UP一下。

下面是refill函数：

487 template <bool threads, int inst>
488 void* __default_alloc_template<threads, inst>::refill(size_t n)
489 {
490     int nobjs = 20;  //默认一次分配20个需求大小的区块
491     char * chunk = chunk_alloc(n, nobjs); //到内存池中获取控件，chunk是分配的空间的开始地址，令其类型为char *,主要是因为一个char的大小正好是一个byte
492     obj * __VOLATILE *my_free_list;
493     obj * result;
494     obj * current_obj, * next_obj;
495     int i;
496 
497     //如果只获得一个区块，这个区块就分配给调用者,free list 无新节点
498     if (1 == nobjs) return chunk;//nobjs开始定义为20，这里为什么要检查是否为1呢，原因是以传引用的方式穿到chunk_alloc，并且该函数会将njobs修改为实际申请到的数量
499     //否则准备调整free list，纳入新节点
500     my_free_list = free_list + FREELIST_INDEX(n);
501 
502     //以下在chunk空间内建立free list
503     result = (obj *)chunk;  //这一块准备返回给客端
504     // 以下导引free list 指向新配置的空间(取自内存池)
505     
506     //由于chunk是char*，所以加上n，就表示走过n个char，
507     //一个char正好是一个byte，所以chunk+n现在指向第二个区块
508     *my_free_list = next_obj = (obj *)(chunk + n); 
509     for (i = 1; ; ++i)
510     {
511         // 从1开始，因为第0个将返回给客端
512         current_obj = next_obj;
513         // 每次移动n个char，正好是n个byte，所以正好指向下个区块
514         next_obj = (obj *)((char *)next_obj + n);

              //下面讲下这个判断，假如从内存池中申请到了3个块的连续空间，上边的操作已经将第一个块空间返回个用户，那么只需要将剩下的两个换成链表形式，i表示已经被换成节点的个数，而 njobs表示总共个数，又由于第一个已经分配给了用户，所以只需处理njobs - 1个，那么nobjs - 1 == i 也就表示：是否将剩下的整块空间整理成的链表形式。
515         if (nobjs - 1 == i)
516         {
517             // 已经遍历完，此时next_obj指向的内存已经超出我们分配的大小了
518             // 不属于我们的内存
519             current_obj -> free_list_link = 0;
520             break;
521         }
522         else
523         {
524             current_obj -> free_list_link = next_obj;
525         }
526     }
527     return result;
528 }

那么chunk_alloc又是什么样的呢?:

template <bool threads, int inst>
401 char *
402 __default_alloc_template<threads, inst>::chunk_alloc(size_t size, int& nobjs)
403 {
404     char * result;
405     size_t total_bytes = size * nobjs;
406     size_t bytes_left = end_free - start_free;    //内存池剩余空间
407 
408     if (bytes_left >= total_bytes)
409     {
410         //内存池中剩余的空间足够满足需求量
411         result = start_free;
412         start_free += total_bytes;
413         return(result);
414     }
415     else if (bytes_left >= size)
416     {
417         //内存池剩余空间不能完全满足需求量，但足够供应一个及以上的区块
418         nobjs = bytes_left/size;
419         total_bytes = size * nobjs;
420         result = start_free;
421         start_free += total_bytes;
422         return (result);
423     }
424     else
425     {
426         //内存池连一个区块的大小都无法满足，这时需要向堆中申请内存，但在这之前首先对内存池剩余的空间加以利用
427         size_t bytes_to_get = 2 * total_bytes + ROUND_UP(heap_size >> 4);
428         //以下试着让内存池中的残余零头还有利用价值
429         if (bytes_left > 0)
430         {
431             //内存池中内还有一些零头，先配给适当的free list
432             //首先寻找适当的free list
433             obj * __VOLATILE * my_free_list = 
434                     free_list + FREELIST_INDEX(bytes_left);
435 
436             //调整free list,将内存池中残余的空间编入
437             ((obj *)start_free) -> free_list_link = *my_free_list; 
438             *my_free_list = (obj *)start_free;
439         }
440 
441         //配置heap空间，用来补充内存池
442         start_free = (char *)malloc(bytes_to_get);
443         if (0 == start_free)
444         {
445             //如果heap空间不足，malloc()失败
446             int i;
447             obj * __VOLATILE *my_free_list, *p;
448            //当堆中内存也不够用时，需要向数组中节点大小更大的链表去要空间
452             for (i = size; i <= __MAX_BYTES; i += __ALIGN)
453             {
454                 my_free_list = free_list + FREELIST_INDEX(i);
455                 p = *my_free_list;
456                 if (0 != p)
457                 {
458                     //free list内尚有未用区块
459                     //调整free list以释放出未用的区块到内存池
460                     *my_free_list = p -> free_list_link;
461                     start_free = (char *)p;
462                     end_free = start_free + i;
463                     // 此时内存池已经有内存了
464                     //修改istart_free和end_free后 递归调用自己，为了修正objs，同时如果你按照本函数的流程再走一遍的话，会发现其实只需一遍就可以完成任务，
465                     return chunk_alloc(size, nobjs);
466                     //注意，任何残余的零头终将被编入适当的free list中备用
467                     
468                 }
469             }
470             end_free = 0;  //如果出现意外(山穷水尽，到处都没有内存可用了)
471             //调用第一级配置器，之前一直没有理解，为什么当都没有内存后，需要调用第一级配置器，看过这边才明白一些。之前提到，第一级配置器中有个set_alloc_handler函数，也就是设置当内存不足时的处理函数，而这个函数做的最多的便是做一些搜集系统内可用的内存，然后可以在循环中调用申请空间的函数时能申请到空间等一些工作，详见<effective c++> 3rd中的条款49到52.
472             start_free = (char *)malloc_alloc::allocate(bytes_to_get);
473             //这会导致抛出异常，或内存不足的情况获得改善
474         }
475         heap_size += bytes_to_get;
476         end_free = start_free + bytes_to_get;
477         //递归调用自己，为了修正objs
478         return chunk_alloc(size, nobjs);
479     }
480 }

本人觉得STL的这个容器配置器最经典的：

1：整体的优化设计，分两层配置空间。

2:就是上边的 chunk_alloc 函数的设计，尤其是最后递归调用，自己整理自己。

之前一直没有理解，这次感觉理解了一些后觉得收获很大。

好了，最后在总结一些整个配置思路:

1.大于128的，直接调用一集配置器；

2.小于128的调用二级配置器：先找到合适的区块大小的链表要空间，若没有到内存池要，内存池没有到堆要，堆也没了，到数组其他的链表要，并整理取得区块的剩余的小空间，如果整个数组中都没有找到，就调用一级配置器，将其作为函数的最后的出口，其实主要是通过以及配置器中的

处理函数解决。

整个过程如上，具体细节可阅读代码，对着注释看，便一目了然。

之前讲过，第一级适配器中要设置处理函数，他恰巧可以作为了第二级配置器的出口，那么这个处理函数是怎么样的，具体做了哪些工作？如何设置等，见下篇文章。

posted @ 2016-06-05 12:21 mr_yu 阅读(579) 评论(0) 编辑收藏举报

努力加载评论中...

刷新页面返回顶部

mr_yu

STL内存配置器

公告