什么是 e820?
e820 是 BIOS 用来报告物理内存分布的一个基础设施。因此,e820 是一个很重要的东西,它报告了那些物理内存范围是可用的,那些范围是预留的。
e820 流程分析
linux 获取内存分布从 machine_specific_memory_setup 函数开始,这个函数在多个文件里都有定义,此处我们只看 include/asm-i386/mach-default/setup_arch_post.h 文件中的定义,函数的定义如下:
static char * __init machine_specific_memory_setup(void) { char *who; who = "BIOS-e820"; /* * Try to copy the BIOS-supplied E820-map. * * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ sanitize_e820_map(E820_MAP, &E820_MAP_NR); if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) { unsigned long mem_size; /* compare results from other methods and take the greater */ if (ALT_MEM_K < EXT_MEM_K) { mem_size = EXT_MEM_K; who = "BIOS-88"; } else { mem_size = ALT_MEM_K; who = "BIOS-e801"; } e820.nr_map = 0; add_memory_region(0, LOWMEMSIZE(), E820_RAM); add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); } return who; }
函数内部调用了 sanitize_e820_map 这个函数,这个函数很重要,它对 BIOS 报告的内存分布进行了一定的处理。调用函数时传递了 E820_MAP 和 E820_MAP_NR 这两个参数,这两个参数定义在 include/asm-i386/setup.h,定义如下:
#define E820_MAP_NR (*(char*) (PARAM+E820NR)) #define E820_MAP ((struct e820entry *) (PARAM+E820MAP))
其中 PARAM 的定义如下:
extern unsigned char boot_params[PARAM_SIZE]; #define PARAM (boot_params)
可以看到,其实 PARAM 就是一个字节数组,这个数组的大小为 PARAM_SIZE ,它的值为4096。
但是 boot_params 本身也是一个声明,它的定义在哪里呢?它定义在 asm/i386/kernel/setup.c 中,定义如下:
unsigned char __initdata boot_params[PARAM_SIZE];
其实,这里已经很明显了,E820NR 和 E820MAP 就是对应参数的偏移量,一个是 e820 映射的数量在这个数组中的偏移量,另一个就是 e820entry 开始的偏移量,他们的定义如下:
#define E820MAP 0x2d0 /* our map */ #define E820MAX 128 /* number of entries in E820MAP */ #define E820NR 0x1e8 /* # entries in E820MAP */
这样,传递给 sanitize_e820_map 函数的两个实参分别是 e820entry 数组的其实地址和存有这个数组的元素数量的内存地址。
另外,boot_params 这个数组的填充在 arch/i386/kernel/head.S 中,初始化代码如下:
/* * Copy bootup parameters out of the way. * Note: %esi still has the pointer to the real-mode data. */ movl $boot_params,%edi movl $(PARAM_SIZE/4),%ecx cld rep movsl movl boot_params+NEW_CL_POINTER,%esi
处理 BIOS 报告的内存区域可能存在重叠的问题
sanitize_e820_map 函数定义在 arch/i386/kernel/setup.c 中,定义如下:
/* * Sanitize the BIOS e820 map. * * Some e820 responses include overlapping entries. The following * replaces the original e820 map with a new one, removing overlaps. * */ struct change_member { struct e820entry *pbios; /* pointer to original bios entry */ unsigned long long addr; /* address for this change point */ }; static struct change_member change_point_list[2*E820MAX] __initdata; static struct change_member *change_point[2*E820MAX] __initdata; static struct e820entry *overlap_list[E820MAX] __initdata; static struct e820entry new_bios[E820MAX] __initdata; static int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) { struct change_member *change_tmp; unsigned long current_type, last_type; unsigned long long last_addr; int chgidx, still_changing; int overlap_entries; int new_bios_entry; int old_nr, new_nr, chg_nr; int i; /* Visually we're performing the following (1,2,3,4 = memory types)... Sample memory map (w/overlaps): ____22__________________ ______________________4_ ____1111________________ _44_____________________ 11111111________________ ____________________33__ ___________44___________ __________33333_________ ______________22________ ___________________2222_ _________111111111______ _____________________11_ _________________4______ Sanitized equivalent (no overlap): 1_______________________ _44_____________________ ___1____________________ ____22__________________ ______11________________ _________1______________ __________3_____________ ___________44___________ _____________33_________ _______________2________ ________________1_______ _________________4______ ___________________2____ ____________________33__ ______________________4_ */ /* if there's only one memory region, don't bother */ if (*pnr_map < 2) return -1; old_nr = *pnr_map; /* bail out if we find any unreasonable addresses in bios map */ for (i=0; i<old_nr; i++) if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) return -1; /* create pointers for initial change-point information (for sorting) */ for (i=0; i < 2*old_nr; i++) change_point[i] = &change_point_list[i]; /* record all known change-points (starting and ending addresses), omitting those that are for empty memory regions */ chgidx = 0; for (i=0; i < old_nr; i++) { if (biosmap[i].size != 0) { change_point[chgidx]->addr = biosmap[i].addr; change_point[chgidx++]->pbios = &biosmap[i]; change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size; change_point[chgidx++]->pbios = &biosmap[i]; } } chg_nr = chgidx; /* true number of change-points */ /* sort change-point list by memory addresses (low -> high) */ still_changing = 1; while (still_changing) { still_changing = 0; for (i=1; i < chg_nr; i++) { /* if <current_addr> > <last_addr>, swap */ /* or, if current=<start_addr> & last=<end_addr>, swap */ if ((change_point[i]->addr < change_point[i-1]->addr) || ((change_point[i]->addr == change_point[i-1]->addr) && (change_point[i]->addr == change_point[i]->pbios->addr) && (change_point[i-1]->addr != change_point[i-1]->pbios->addr)) ) { change_tmp = change_point[i]; change_point[i] = change_point[i-1]; change_point[i-1] = change_tmp; still_changing=1; } } } /* create a new bios memory map, removing overlaps */ overlap_entries=0; /* number of entries in the overlap table */ new_bios_entry=0; /* index for creating new bios map entries */ last_type = 0; /* start with undefined memory type */ last_addr = 0; /* start with 0 as last starting address */ /* loop through change-points, determining affect on the new bios map */ for (chgidx=0; chgidx < chg_nr; chgidx++) { /* keep track of all overlapping bios entries */ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr) { /* add map entry to overlap list (> 1 entry implies an overlap) */ overlap_list[overlap_entries++]=change_point[chgidx]->pbios; } else { /* remove entry from list (order independent, so swap with last) */ for (i=0; i<overlap_entries; i++) { if (overlap_list[i] == change_point[chgidx]->pbios) overlap_list[i] = overlap_list[overlap_entries-1]; } overlap_entries--; } /* if there are overlapping entries, decide which "type" to use */ /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */ current_type = 0; for (i=0; i<overlap_entries; i++) if (overlap_list[i]->type > current_type) current_type = overlap_list[i]->type; /* continue building up new bios map based on this information */ if (current_type != last_type) { if (last_type != 0) { new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; /* move forward only if the new size was non-zero */ if (new_bios[new_bios_entry].size != 0) if (++new_bios_entry >= E820MAX) break; /* no more space left for new bios entries */ } if (current_type != 0) { new_bios[new_bios_entry].addr = change_point[chgidx]->addr; new_bios[new_bios_entry].type = current_type; last_addr=change_point[chgidx]->addr; } last_type = current_type; } } new_nr = new_bios_entry; /* retain count for new bios entries */ /* copy new bios mapping into original location */ memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); *pnr_map = new_nr; return 0; }
sanitize_e820_map 函数的作用就是解决 BIOS 报告的内存区域存在重叠的问题,得到一个新的内存区域分布,然后分别更新分布和数量。
处理完分布之后,就开始调用 copy_e820_map 函数,从这个函数的名称就可以看出来,这个函数的作用是拷贝内存区域分布,这个函数的定义如下:
/* * Copy the BIOS e820 map into a safe place. * * Sanity-check it while we're at it.. * * If we're lucky and live on a modern system, the setup code * will have given us a memory map that we can use to properly * set up memory. If we aren't, we'll fake a memory map. * * We check to see that the memory map contains at least 2 elements * before we'll use it, because the detection code in setup.S may * not be perfect and most every PC known to man has two memory * regions: one from 0 to 640k, and one from 1mb up. (The IBM * thinkpad 560x, for example, does not cooperate with the memory * detection code.) */ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) { /* Only one memory region (or negative)? Ignore it */ if (nr_map < 2) return -1; do { unsigned long long start = biosmap->addr; unsigned long long size = biosmap->size; unsigned long long end = start + size; unsigned long type = biosmap->type; /* Overflow in 64 bits? Ignore the memory map. */ if (start > end) return -1; /* * Some BIOSes claim RAM in the 640k - 1M region. * Not right. Fix it up. */ if (type == E820_RAM) { if (start < 0x100000ULL && end > 0xA0000ULL) { if (start < 0xA0000ULL) add_memory_region(start, 0xA0000ULL-start, type); if (end <= 0x100000ULL) continue; start = 0x100000ULL; size = end - start; } } add_memory_region(start, size, type); } while (biosmap++,--nr_map); return 0; }
这个函数的在拷贝之前会对区域类型为 E820_RAM 的分布做一个特殊的处理,如果起始地址小于 0x10000 且结束地址大于 0xA0000,换句话说,如果内存区域和 640k - 1M 这部分区域存在重叠,那么必须得做一些特殊的处理。如果起始地址小于0xA0000,那么只添加 start-0xA0000 这段区域。如果结束地址小于等于0x100000,那么直接跳过此次循环,因为不需要在添加区域了,如果没有跳过此次循环,则说明 end 大于0x10000,那么就添加 0x100000-end 这段内存区域。
函数内部调用了 add_memory_region 函数,这个函数也定义在 arch/i386/kernel/setup.c 中,它的定义如下:
static void __init add_memory_region(unsigned long long start, unsigned long long size, int type) { int x; if (!efi_enabled) { x = e820.nr_map; if (x == E820MAX) { printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); return; } e820.map[x].addr = start; e820.map[x].size = size; e820.map[x].type = type; e820.nr_map++; } } /* add_memory_region */
这个函数很简单,只是简单的将数据填充到 e820 的 map 数组中。
至此,e820 的整体流程已经全部分析完了,但是里面还存在一些细节,没有详细展开说明。不过本文的重点是讲解 e820 的整个流程,所以就不再详细说明了。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· AI与.NET技术实操系列(六):基于图像分类模型对图像进行分类