asmlinkage void __init start_kernel(void)
{
char * command_line;
unsigned long mempages;
extern char saved_command_line[];
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
/*锁内核*/
lock_kernel();
/*打印内核的版本和编译的信息*/
printk(linux_banner);
/*解析内核的命令行中与内存相关的信息和内存分布信息*/
setup_arch(&command_line);
/*打印命令行信息*/
printk("Kernel command line: %s\n", saved_command_line);
/*解析传递给内核的命令行中的0号进程的程序名和环境变量*/
parse_options(command_line);
/*常用中断,异常的初始化*/
trap_init();
/*非常用的中断初始化*/
init_IRQ();
/*调度相关的计时器和底半部的初始化*/
sched_init();
/*时钟初始化*/
time_init();
/*软中断tasklet初始化*/
softirq_init();
/*
* HACK ALERT! This is early. We're enabling the console before
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
/*终端初始化*/
console_init();
#ifdef CONFIG_MODULES
/*初始化模块symbol表大小*/
init_modules();
#endif
if (prof_shift) {
unsigned int size;
/* only text is profiled */
prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
prof_len >>= prof_shift;
size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
prof_buffer = (unsigned int *) alloc_bootmem(size);
}
/*初始化slab分配器*/
kmem_cache_init();
sti();
calibrate_delay();
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_start && !initrd_below_start_ok &&
initrd_start < min_low_pfn << PAGE_SHIFT) {
printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
"disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT);
initrd_start = 0;
}
#endif
/*设置高端内存和内存的标志位*/
mem_init();
/*初始化内部和一般的slab分配器*/
kmem_cache_sizes_init();
#ifdef CONFIG_3215_CONSOLE
con3215_activate();
#endif
#ifdef CONFIG_PROC_FS
/*建立proc文件系统的目录*/
proc_root_init();
#endif
mempages = num_physpages;
/*初始化最大线程数*/
fork_init(mempages);
/*创建一些常用的slab分配器的数据结构*/
proc_caches_init();
vfs_caches_init(mempages);
/*初始化buffer数据结构*/
buffer_init(mempages);
/*初始化页表的缓冲结构*/
page_cache_init(mempages);
kiobuf_setup();
/*创建signal的slab数据结构*/
signals_init();
bdev_init();
/*初始化文件系统的inode结构*/
inode_init(mempages);
#if defined(CONFIG_SYSVIPC)
/*初始化sysv的信号量,消息,共享内存*/
ipc_init();
#endif
#if defined(CONFIG_QUOTA)
dquot_init_hash();
#endif
check_bugs();
printk("POSIX conformance testing by UNIFIX\n");
/*
* We count on the initial thread going ok
* Like idlers init is an unlocked kernel thread, which will
* make syscalls (and thus be locked).
*/
/*初始化SMP,主要是APIC的初始化*/
smp_init();
/*创建init进程*/
kernel_thread(init, NULL, CLONE_FS | CLONE_FILES | CLONE_SIGNAL);
unlock_kernel();
current->need_resched = 1;
/*运行idle进程,进行调度*/
cpu_idle();
}
void __init setup_arch(char **cmdline_p)
{
unsigned long bootmap_size;
unsigned long start_pfn, max_pfn, max_low_pfn;
int i;
#ifdef CONFIG_VISWS
visws_get_board_type_and_rev();
#endif
/*将rootfs转化成kdev的表示形式,这里跟传统的表示没有不同*/
ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
drive_info = DRIVE_INFO;
screen_info = SCREEN_INFO;
apm_info.bios = APM_BIOS_INFO;
/*将系统的描述信息写入全局变量中*/
if( SYS_DESC_TABLE.length != 0 ) {
MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
machine_id = SYS_DESC_TABLE.table[0];
machine_submodel_id = SYS_DESC_TABLE.table[1];
BIOS_revision = SYS_DESC_TABLE.table[2];
}
aux_device_present = AUX_DEVICE_INFO;
#ifdef CONFIG_BLK_DEV_RAM
/*设置RAMDISK的标志*/
rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
#endif
setup_memory_region();
if (!MOUNT_ROOT_RDONLY)
root_mountflags &= ~MS_RDONLY;
/*在init_mm存放数据段,代码段和堆栈段的起始地址,结束地址*/
init_mm.start_code = (unsigned long) &_text;
init_mm.end_code = (unsigned long) &_etext;
init_mm.end_data = (unsigned long) &_edata;
init_mm.brk = (unsigned long) &_end;
/*存放内核和数据段的起始和结束地址,这里是转化为物理地址存放的*/
code_resource.start = virt_to_bus(&_text);
code_resource.end = virt_to_bus(&_etext)-1;
data_resource.start = virt_to_bus(&_etext);
data_resource.end = virt_to_bus(&_edata)-1;
/*解析命令行中的"mem="参数*/
parse_mem_cmdline(cmdline_p);
#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
/*
* 128MB for vmalloc and initrd
*/
#define VMALLOC_RESERVE (unsigned long)(128 << 20)
#define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
#define MAXMEM_PFN PFN_DOWN(MAXMEM)
#define MAX_NONPAE_PFN (1 << 20)
/*
* partially used pages are not usable - thus
* we are rounding upwards:
*/
/*找到起始物理页号*/
start_pfn = PFN_UP(__pa(&_end));
/*
* Find the highest page frame number we have available
*/
/*从E820中找到最高物理页号*/
max_pfn = 0;
for (i = 0; i < e820.nr_map; i++) {
unsigned long start, end;
/* RAM? */
if (e820.map[i].type != E820_RAM)
continue;
start = PFN_UP(e820.map[i].addr);
end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (start >= end)
continue;
if (end > max_pfn)
max_pfn = end;
}
/*
* Determine low and high memory ranges:
*/
/*找到最高的低端物理页号,既896M对应的物理页号*/
max_low_pfn = max_pfn;
if (max_low_pfn > MAXMEM_PFN) {
max_low_pfn = MAXMEM_PFN;
#ifndef CONFIG_HIGHMEM
/* Maximum memory usable is what is directly addressable */
printk(KERN_WARNING "Warning only %ldMB will be used.\n",
MAXMEM>>20);
if (max_pfn > MAX_NONPAE_PFN)
printk(KERN_WARNING "Use a PAE enabled kernel.\n");
else
printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
#else /* !CONFIG_HIGHMEM */
#ifndef CONFIG_X86_PAE
if (max_pfn > MAX_NONPAE_PFN) {
max_pfn = MAX_NONPAE_PFN;
printk(KERN_WARNING "Warning only 4GB will be used.\n");
printk(KERN_WARNING "Use a PAE enabled kernel.\n");
}
#endif /* !CONFIG_X86_PAE */
#endif /* !CONFIG_HIGHMEM */
}
/*设置高端内存的起始和结束地址*/
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > MAXMEM_PFN) {
highstart_pfn = MAXMEM_PFN;
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
}
#endif
/*
* Initialize the boot-time allocator (with low memory only):
*/
/*初始化896M以下的boot内存*/
bootmap_size = init_bootmem(start_pfn, max_low_pfn);
/*
* Register fully available low RAM pages with the bootmem allocator.
*/
/*将896M以下的内存设为可用状态*/
for (i = 0; i < e820.nr_map; i++) {
unsigned long curr_pfn, last_pfn, size;
/*
* Reserve usable low memory
*/
if (e820.map[i].type != E820_RAM)
continue;
/*
* We are rounding up the start address of usable memory:
*/
curr_pfn = PFN_UP(e820.map[i].addr);
if (curr_pfn >= max_low_pfn)
continue;
/*
* ... and at the end of the usable range downwards:
*/
last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
if (last_pfn > max_low_pfn)
last_pfn = max_low_pfn;
/*
* .. finally, did all the rounding and playing
* around just make the area go away?
*/
if (last_pfn <= curr_pfn)
continue;
size = last_pfn - curr_pfn;
free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
}
/*
* Reserve the bootmem bitmap itself as well. We do this in two
* steps (first step was init_bootmem()) because this catches
* the (very unlikely) case of us accidentally initializing the
* bootmem allocator with an invalid RAM area.
*/
/*保留bootmem自己的内存*/
reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
/*
* reserve physical page 0 - it's a special BIOS page on many boxes,
* enabling clean reboots, SMP operation, laptop functions.
*/
/*保留第一个物理页*/
reserve_bootmem(0, PAGE_SIZE);
#ifdef CONFIG_SMP
/*
* But first pinch a few for the stack/trampoline stuff
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
#endif
#ifdef CONFIG_X86_IO_APIC
/*
* Find and reserve possible boot-time SMP configuration:
*/
find_smp_config();
#endif
/*初始化构建页表*/
paging_init();
#ifdef CONFIG_X86_IO_APIC
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
get_smp_config();
#endif
#ifdef CONFIG_X86_LOCAL_APIC
init_apic_mappings();
#endif
#ifdef CONFIG_BLK_DEV_INITRD
/*将RAMDISK的空间保留下来*/
if (LOADER_TYPE && INITRD_START) {
if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) {
reserve_bootmem(INITRD_START, INITRD_SIZE);
initrd_start =
INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
initrd_end = initrd_start+INITRD_SIZE;
}
else {
printk("initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
INITRD_START + INITRD_SIZE,
max_low_pfn << PAGE_SHIFT);
initrd_start = 0;
}
}
#endif
/*
* Request address space for all standard RAM and ROM resources
* and also for regions reported as reserved by the e820.
*/
/*将ROM加入资源列表中*/
probe_roms();
/*将RAM等各种资源加入列表中*/
for (i = 0; i < e820.nr_map; i++) {
struct resource *res;
if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
continue;
res = alloc_bootmem_low(sizeof(struct resource));
switch (e820.map[i].type) {
case E820_RAM: res->name = "System RAM"; break;
case E820_ACPI: res->name = "ACPI Tables"; break;
case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
default: res->name = "reserved";
}
res->start = e820.map[i].addr;
res->end = res->start + e820.map[i].size - 1;
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(&iomem_resource, res);
if (e820.map[i].type == E820_RAM) {
/*
* We dont't know which RAM region contains kernel data,
* so we try it repeatedly and let the resource manager
* test it.
*/
request_resource(res, &code_resource);
request_resource(res, &data_resource);
}
}
request_resource(&iomem_resource, &vram_resource);
/* request I/O space for devices used on all i[345]86 PCs */
for (i = 0; i < STANDARD_IO_RESOURCES; i++)
request_resource(&ioport_resource, standard_io_resources+i);
#ifdef CONFIG_VT
#if defined(CONFIG_VGA_CONSOLE)
conswitchp = &vga_con;
#elif defined(CONFIG_DUMMY_CONSOLE)
conswitchp = &dummy_con;
#endif
#endif
}
setup_arch->setup_memory_region 这个表示调用关系
void __init setup_memory_region(void)
{
char *who = "BIOS-e820";
/*
* Try to copy the BIOS-supplied E820-map.
*
* Otherwise fake a memory map; one section from 0k->640k,
* the next section from 1mb->appropriate_mem_k
*/
/*拷贝E820得到的内存分配图,如果E820没有获得内存分配图,自己创造一个分布图*/
if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
unsigned long mem_size;
/* compare results from other methods and take the greater */
if (ALT_MEM_K < EXT_MEM_K) {
mem_size = EXT_MEM_K;
who = "BIOS-88";
} else {
mem_size = ALT_MEM_K;
who = "BIOS-e801";
}
/*创造两个内存分配,0-0x9f000, 1M-E801或者88找到的内存的最大值*/
e820.nr_map = 0;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
add_memory_region(HIGH_MEMORY, (mem_size << 10) - HIGH_MEMORY, E820_RAM);
}
printk("BIOS-provided physical RAM map:\n");
/*根据打印内存的大小和起始地址和类型*/
print_memory_map(who);
}
setup_arch->setup_memory_region->copy_e820_map
/*
* Copy the BIOS e820 map into a safe place.
*
* Sanity-check it while we're at it..
*
* If we're lucky and live on a modern system, the setup code
* will have given us a memory map that we can use to properly
* set up memory. If we aren't, we'll fake a memory map.
*
* We check to see that the memory map contains at least 2 elements
* before we'll use it, because the detection code in setup.S may
* not be perfect and most every PC known to man has two memory
* regions: one from 0 to 640k, and one from 1mb up. (The IBM
* thinkpad 560x, for example, does not cooperate with the memory
* detection code.)
*/
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
{
/* Only one memory region (or negative)? Ignore it */
if (nr_map < 2)
return -1;
do {
unsigned long long start = biosmap->addr;
unsigned long long size = biosmap->size;
unsigned long long end = start + size;
unsigned long type = biosmap->type;
/* Overflow in 64 bits? Ignore the memory map. */
if (start > end)
return -1;
/*
* Some BIOSes claim RAM in the 640k - 1M region.
* Not right. Fix it up.
*/
/*将跨0xA0000-0x100000的内存分开,这一段是系统使用的*/
if (type == E820_RAM) {
if (start < 0x100000ULL && end > 0xA0000ULL) {
if (start < 0xA0000ULL)
add_memory_region(start, 0xA0000ULL-start, type);
if (end <= 0x100000ULL)
continue;
start = 0x100000ULL;
size = end - start;
}
}
/*记录分配的内存的开始地址,大小和类型*/
add_memory_region(start, size, type);
} while (biosmap++,--nr_map);
return 0;
}
setup_arch->setup_memory_region->copy_e820_map->add_memory_region
void __init add_memory_region(unsigned long long start,
unsigned long long size, int type)
{
int x = e820.nr_map;
if (x == E820MAX) {
printk("Ooops! Too many entries in the memory map!\n");
return;
}
/*记录内存分布的开始,大小和类型*/
e820.map[x].addr = start;
e820.map[x].size = size;
e820.map[x].type = type;
e820.nr_map++;
} /* add_memory_region */
start_kernel->setup_arch->parse_mem_cmdline
static inline void parse_mem_cmdline (char ** cmdline_p)
{
char c = ' ', *to = command_line, *from = COMMAND_LINE;
int len = 0;
int usermem = 0;
/* Save unparsed command line copy for /proc/cmdline */
memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
saved_command_line[COMMAND_LINE_SIZE-1] = '\0';
for (;;) {
/*
* "mem=nopentium" disables the 4MB page tables.
* "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
* to <mem>, overriding the bios size.
* "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
* <start> to <start>+<mem>, overriding the bios size.
*/
/*解析mem=参数*/
if (c == ' ' && !memcmp(from, "mem=", 4)) {
if (to != command_line)
to--;
/*如果是mem=nopentium,将PSE的标志去掉*/
if (!memcmp(from+4, "nopentium", 9)) {
from += 9+4;
clear_bit(X86_FEATURE_PSE, &boot_cpu_data.x86_capability);
/*通过exactmap, mem=xxxx@XXXX,完全使用命令行上指定的参数*/
} else if (!memcmp(from+4, "exactmap", 8)) {
from += 8+4;
e820.nr_map = 0;
usermem = 1;
} else {
/* If the user specifies memory size, we
* blow away any automatically generated
* size
*/
unsigned long start_at, mem_size;
/*如果命令行没有指定exactmap参数,将0-0x9f000加入到内存分布表中*/
if (usermem == 0) {
/* first time in: zap the whitelist
* and reinitialize it with the
* standard low-memory region.
*/
e820.nr_map = 0;
usermem = 1;
add_memory_region(0, LOWMEMSIZE(), E820_RAM);
}
/*解析出内存块的大小*/
mem_size = memparse(from+4, &from);
/*解析出内存块的开始地址,如果没指定就用1M做为起始地址*/
if (*from == '@')
start_at = memparse(from+1, &from);
else {
start_at = HIGH_MEMORY;
mem_size -= HIGH_MEMORY;
usermem=0;
}
/*将内存块加入到内存分布表中*/
add_memory_region(start_at, mem_size, E820_RAM);
}
}
c = *(from++);
if (!c)
break;
if (COMMAND_LINE_SIZE <= ++len)
break;
*(to++) = c;
}
*to = '\0';
*cmdline_p = command_line;
if (usermem) {
printk("user-defined physical RAM map:\n");
print_memory_map("user");
}
}