Linux文件映射的反思

1. 思考

多个进程可以加载相同的共享链接库,比如C语言的运行库,加载运行库采用内存映射文件的方式,可以延迟对于文件内容的读入操作。

共享链接库文件,是一个elf格式的库文件,里面会包含多个不同的section,比如text/data/bss section。

不同的section的访问方式和权限都有差别,比如text section可能一般要求“只读、可以执行”的权限,而data/bss section最起码要在“可读、可写”的权限。

那么,这种不同权限的section在内存映射时,是怎样区分对待,映射到不同的内存区域中的呢?

 

Linux下的共享链接库可以通过dlopen函数来加载,该函数会负责将一个共享链接库的不同的section以不同的方式映射到进程的地址空间中,

   1: daniel@ubuntu:/proc$ cat /proc/self/maps
   2: 00262000-00280000 r-xp 00000000 08:01 525208     /lib/i386-linux-gnu/ld-2.13.so
   3: 00280000-00281000 r--p 0001d000 08:01 525208     /lib/i386-linux-gnu/ld-2.13.so
   4: 00281000-00282000 rw-p 0001e000 08:01 525208     /lib/i386-linux-gnu/ld-2.13.so
   5: 002df000-00455000 r-xp 00000000 08:01 525221     /lib/i386-linux-gnu/libc-2.13.so
   6: 00455000-00457000 r--p 00176000 08:01 525221     /lib/i386-linux-gnu/libc-2.13.so
   7: 00457000-00458000 rw-p 00178000 08:01 525221     /lib/i386-linux-gnu/libc-2.13.so
   8: 00458000-0045b000 rw-p 00000000 00:00 0 
   9: 009b8000-009b9000 r-xp 00000000 00:00 0          [vdso]
  10: 08048000-08051000 r-xp 00000000 08:01 1310739    /bin/cat
  11: 08051000-08052000 r--p 00008000 08:01 1310739    /bin/cat
  12: 08052000-08053000 rw-p 00009000 08:01 1310739    /bin/cat
  13: 08fad000-08fce000 rw-p 00000000 00:00 0          [heap]
  14: b7516000-b7517000 r--p 0043a000 08:01 3021821    /usr/lib/locale/locale-archive
  15: b7517000-b7557000 r--p 002bd000 08:01 3021821    /usr/lib/locale/locale-archive
  16: b7557000-b7757000 r--p 00000000 08:01 3021821    /usr/lib/locale/locale-archive
  17: b7757000-b7758000 rw-p 00000000 00:00 0 
  18: b7768000-b776a000 rw-p 00000000 00:00 0 
  19: bf9e6000-bfa07000 rw-p 00000000 00:00 0          [stack]

库文件/lib/i386-linux-gnu/libc-2.13.so被映射到了三个内存区域,如果想了解各个字段都代表什么意思,可以到内核源码中去查看一个/proc/[pid]/maps是怎么打印出来的。

   1: static const struct seq_operations proc_pid_maps_op = {
   2:     .start    = m_start,
   3:     .next    = m_next,
   4:     .stop    = m_stop,
   5:     .show    = show_map
   6: };
   1: static int show_map(struct seq_file *m, void *v)
   2: {
   3:     struct vm_area_struct *vma = v;
   4:     struct proc_maps_private *priv = m->private;
   5:     struct task_struct *task = priv->task;
   6:  
   7:     show_map_vma(m, vma);
   8:  
   9:     if (m->count < m->size)  /* vma is copied successfully */
  10:         m->version = (vma != get_gate_vma(task->mm))
  11:             ? vma->vm_start : 0;
  12:     return 0;
  13: }
   1: static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
   2: {
   3:     struct mm_struct *mm = vma->vm_mm;
   4:     struct file *file = vma->vm_file;
   5:     vm_flags_t flags = vma->vm_flags;
   6:     unsigned long ino = 0;
   7:     unsigned long long pgoff = 0;
   8:     unsigned long start, end;
   9:     dev_t dev = 0;
  10:     int len;
  11:  
  12:     if (file) {
  13:         struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
  14:         dev = inode->i_sb->s_dev;
  15:         ino = inode->i_ino;
  16:         pgoff = ((loff_t)vma->vm_pgoff) << PAGE_SHIFT;
  17:     }
  18:  
  19:     /* We don't show the stack guard page in /proc/maps */
  20:     start = vma->vm_start;
  21:     if (stack_guard_page_start(vma, start))
  22:         start += PAGE_SIZE;
  23:     end = vma->vm_end;
  24:     if (stack_guard_page_end(vma, end))
  25:         end -= PAGE_SIZE;
  26:  
  27:     seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
  28:             start,
  29:             end,
  30:             flags & VM_READ ? 'r' : '-',
  31:             flags & VM_WRITE ? 'w' : '-',
  32:             flags & VM_EXEC ? 'x' : '-',
  33:             flags & VM_MAYSHARE ? 's' : 'p',
  34:             pgoff,
  35:             MAJOR(dev), MINOR(dev), ino, &len);
  36:  
  37:     /*
  38:      * Print the dentry name for named mappings, and a
  39:      * special [heap] marker for the heap:
  40:      */
  41:     if (file) {
  42:         pad_len_spaces(m, len);
  43:         seq_path(m, &file->f_path, "\n");
  44:     } else {
  45:         const char *name = arch_vma_name(vma);
  46:         if (!name) {
  47:             if (mm) {
  48:                 if (vma->vm_start <= mm->brk &&
  49:                         vma->vm_end >= mm->start_brk) {
  50:                     name = "[heap]";
  51:                 } else if (vma->vm_start <= mm->start_stack &&
  52:                        vma->vm_end >= mm->start_stack) {
  53:                     name = "[stack]";
  54:                 }
  55:             } else {
  56:                 name = "[vdso]";
  57:             }
  58:         }
  59:         if (name) {
  60:             pad_len_spaces(m, len);
  61:             seq_puts(m, name);
  62:         }
  63:     }
  64:     seq_putc(m, '\n');
  65: }

 

seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n",
            start,
            end,
            flags & VM_READ ? 'r' : '-',
            flags & VM_WRITE ? 'w' : '-',
            flags & VM_EXEC ? 'x' : '-',
            flags & VM_MAYSHARE ? 's' : 'p',
            pgoff,
            MAJOR(dev), MINOR(dev), ino, &len);

[vma的起始地址]-[vma的结束地址] [读][写][执行][共享/私有] [在文件中的偏移] [主设备号]:[次设备号] [inode序号] [长度]

对于最后一个字段

const char *name = arch_vma_name(vma);
        if (!name) {
            if (mm) {
                if (vma->vm_start <= mm->brk &&
                        vma->vm_end >= mm->start_brk
) {
                    name = "[heap]";
                } else if (vma->vm_start <= mm->start_stack &&
                       vma->vm_end >= mm->start_stack
) {
                    name = "[stack]";
                }
            } else {
                name = "[vdso]";
            }
        }
        if (name) {
            pad_len_spaces(m, len);
            seq_puts(m, name);
        }

什么是[vdso]呢?

可以将vdso看成一个shared objdect file(这个文件实际上不存在),内核将其映射到某个地址空间,被所有程序所共享。(我觉得这里用到了一个技术:多个虚拟页面映射到同一个物理页面。即内核把vdso映射到某个物理页面上,然后所有程序都会有一个页表项指向它,以此来共享,这样每个程序的vdso地址就可以不相同了)

参考:http://www.cppblog.com/hex108/archive/2010/11/22/134313.html

vdso是将内核态的调用映射到用户态的地址空间中,使得调用开销更小,路径更好

参考:http://blog.csdn.net/wlp600/article/details/6886162

这是一个内存页,用于中转系统调用,以提高性能。有点类似于Xen中的hypercall页面。

 


我们看一下,被映射成三个区域的部分在文件中是分别是什么样的内容

   1: daniel@ubuntu:/proc$ readelf -S /lib/i386-linux-gnu/libc-2.13.so 
   2: There are 35 section headers, starting at offset 0x178b50:
   3:  
   4: Section Headers:
   5:   [Nr] Name              Type            Addr     Off    Size   ES Flg Lk Inf Al
   6:   [ 0]                   NULL            00000000 000000 000000 00      0   0  0
   7:   [ 1] .note.gnu.build-i NOTE            00000174 000174 000024 00   A  0   0  4
   8:   [ 2] .note.ABI-tag     NOTE            00000198 000198 000020 00   A  0   0  4
   9:   [ 3] .gnu.hash         GNU_HASH        000001b8 0001b8 003c38 04   A  4   0  4
  10:   [ 4] .dynsym           DYNSYM          00003df0 003df0 009200 10   A  5   1  4
  11:   [ 5] .dynstr           STRTAB          0000cff0 00cff0 005acd 00   A  0   0  1
  12:   [ 6] .gnu.version      VERSYM          00012abe 012abe 001240 02   A  4   0  2
  13:   [ 7] .gnu.version_d    VERDEF          00013d00 013d00 0003d8 00   A  5  28  4
  14:   [ 8] .gnu.version_r    VERNEED         000140d8 0140d8 000040 00   A  5   1  4
  15:   [ 9] .rel.dyn          REL             00014118 014118 002a10 08   A  4   0  4
  16:   [10] .rel.plt          REL             00016b28 016b28 000038 08   A  4  11  4
  17:   [11] .plt              PROGBITS        00016b60 016b60 000080 04  AX  0   0 16
  18:   [12] .text             PROGBITS        00016be0 016be0 10cba4 00  AX  0   0 16
  19:   [13] __libc_freeres_fn PROGBITS        00123790 123790 000f17 00  AX  0   0 16
  20:   [14] __libc_thread_fre PROGBITS        001246b0 1246b0 000196 00  AX  0   0 16
  21:   [15] .rodata           PROGBITS        00124860 124860 01dbc8 00   A  0   0 32
  22:   [16] .interp           PROGBITS        00142428 142428 000013 00   A  0   0  1
  23:   [17] .eh_frame_hdr     PROGBITS        0014243c 14243c 0073b4 00   A  0   0  4
  24:   [18] .eh_frame         PROGBITS        001497f0 1497f0 028c74 00   A  0   0  4
  25:   [19] .gcc_except_table PROGBITS        00172464 172464 0005a8 00   A  0   0  1
  26:   [20] .hash             HASH            00172a0c 172a0c 003484 04   A  4   0  4
  27:   [21] .tdata            PROGBITS        001761e4 1761e4 000008 00 WAT  0   0  4
  28:   [22] .tbss             NOBITS          001761ec 1761ec 000038 00 WAT  0   0  4
  29:   [23] .init_array       INIT_ARRAY      001761ec 1761ec 00000c 00  WA  0   0  4
  30:   [24] __libc_subfreeres PROGBITS        001761f8 1761f8 000070 00  WA  0   0  4
  31:   [25] __libc_atexit     PROGBITS        00176268 176268 000004 00  WA  0   0  4
  32:   [26] __libc_thread_sub PROGBITS        0017626c 17626c 00000c 00  WA  0   0  4
  33:   [27] .data.rel.ro      PROGBITS        00176280 176280 001afc 00  WA  0   0 32
  34:   [28] .dynamic          DYNAMIC         00177d7c 177d7c 0000f0 08  WA  5   0  4
  35:   [29] .got              PROGBITS        00177e6c 177e6c 000174 04  WA  0   0  4
  36:   [30] .got.plt          PROGBITS        00177ff4 177ff4 000028 04  WA  0   0  4
  37:   [31] .data             PROGBITS        00178020 178020 00099c 00  WA  0   0 32
  38:   [32] .bss              NOBITS          001789c0 1789bc 003058 00  WA  0   0 32
  39:   [33] .gnu_debuglink    PROGBITS        00000000 1789bc 000014 00      0   0  1
  40:   [34] .shstrtab         STRTAB          00000000 1789d0 00017e 00      0   0  1
  41: Key to Flags:
  42:   W (write), A (alloc), X (execute), M (merge), S (strings)
  43:   I (info), L (link order), G (group), T (TLS), E (exclude), x (unknown)
  44:   O (extra OS processing required) o (OS specific), p (processor specific)

第一个区域,r_xp,是“只读、执行、私有”区域,主要映射的是.text section, .rodata section;

第二个区域,r__p,是“只读、私有”区域,主要映射的是.tdata section, .tbss section;

第三个区域,rw_p,是“读写、私有”区域,主要映射的是.data section, .bss section;

 

因此,对于共享链接库以及可执行文件来说,内存映射实际上只是加载过程的一部分,一个文件可以映射到内存的几个区域。

posted @ 2014-02-10 10:25  Daniel King  阅读(866)  评论(0编辑  收藏  举报