Linux内核启动流程简介
Linux内核链接脚本arch/arm/kernel/vmlinux.lds
arch/arm/kernel/vmlinux.lds中
OUTPUT_ARCH(arm) ENTRY(stext) // Linux内核入口,入口函数stext jiffies = jiffies_64; SECTIONS { /* * XXX: The linker does not define how output sections are * assigned to input sections when there are multiple statements * matching the same input section name. There is no documented * order of matching. * * unwind exit sections must be discarded before the rest of the * unwind sections get included. */ /DISCARD/ : { *(.ARM.exidx.exit.text) *(.ARM.extab.exit.text) ... ...
arch/arm/kernel/head.S
/* * Kernel startup entry point. * --------------------------- * * This is normally called from the decompressor code. The requirements * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0, * r1 = machine nr, r2 = atags or dtb pointer. * * This code is mostly position independent, so if you link the kernel at * 0xc0008000, you call this at __pa(0xc0008000). * * See linux/arch/arm/tools/mach-types for the complete list of machine * numbers for r1. * * We're trying to keep crap to a minimum; DO NOT add any machine specific * crap here - that's what the boot loader (or in extreme, well justified * circumstances, zImage) is for. */ ENTRY(stext) ARM_BE8(setend be ) @ ensure we are in BE8 mode THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. THUMB( bx r9 ) @ If this is a Thumb-2 kernel, THUMB( .thumb ) @ switch to Thumb now. THUMB(1: ) #ifdef CONFIG_ARM_VIRT_EXT bl __hyp_stub_install #endif @ ensure svc mode and all interrupts masked safe_svcmode_maskall r9 // 确保CPU处于svc模式,并且关闭了所有的中断 mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type @ r5=procinfo r9=cpuid // 检查当前系统是否支持此CPU,如果支持就获取procinfo信息 movs r10, r5 @ invalid processor (r5=0)? THUMB( it eq ) @ force fixup-able long branch encoding beq __error_p @ yes, error 'p' ... ... /* * r1 = machine no, r2 = atags or dtb, * r8 = phys_offset, r9 = cpuid, r10 = procinfo */ bl __vet_atags #ifdef CONFIG_SMP_ON_UP bl __fixup_smp #endif #ifdef CONFIG_ARM_PATCH_PHYS_VIRT bl __fixup_pv_table #endif bl __create_page_tables // 创建页表 /* * The following calls CPU specific code in a position independent * manner. See arch/arm/mm/proc-*.S for details. r10 = base of * xxx_proc_info structure selected by __lookup_processor_type * above. On return, the CPU will be ready for the MMU to be * turned on, and r0 will hold the CPU control register value. */ ldr r13, =__mmap_switched @ address to jump to after // r13保存着__mmap_switched函数的地址 @ mmu has been enabled adr lr, BSYM(1f) @ return (PIC) address mov r8, r4 @ set TTBR1 to swapper_pg_dir ldr r12, [r10, #PROCINFO_INITFUNC] add r12, r12, r10 ret r12 1: b __enable_mmu // 使能MMU ENDPROC(stext)
image和zImage都是经过压缩的,Linux内核会先进行解压缩,解压缩完成以后就要运行Linux内核,要求:
1、MMU关闭
2、D cache关闭
3、I cache无所谓
4、r0 = 0,uboot中kernel_entry函数传递来的
5、r1 = machine id,在使用设备树的内核没有用到,uboot中kernel_entry函数传递来的
6、r2 设备树首地址,uboot中kernel_entry函数传递来的
arch/arm/kernel/head-common.S
... ... __mmap_switched: adr r3, __mmap_switched_data ldmia r3!, {r4, r5, r6, r7} cmp r4, r5 @ Copy data segment if needed 1: cmpne r5, r6 ldrne fp, [r4], #4 strne fp, [r5], #4 bne 1b mov fp, #0 @ Clear BSS (and zero fp) 1: cmp r6, r7 strcc fp, [r6],#4 bcc 1b ARM( ldmia r3, {r4, r5, r6, r7, sp}) THUMB( ldmia r3, {r4, r5, r6, r7} ) THUMB( ldr sp, [r3, #16] ) str r9, [r4] @ Save processor ID str r1, [r5] @ Save machine type str r2, [r6] @ Save atags pointer cmp r7, #0 strne r0, [r7] @ Save control register values b start_kernel // 启动linux内核,该函数在init/main.c中 ENDPROC(__mmap_switched) ... ...
init/main.c
asmlinkage __visible void __init start_kernel(void) // 用于声明函数调用约定的宏,它告诉编译器不要对函数参数进行寄存器优化 { char *command_line; char *after_dashes; lockdep_init(); // lockstep是死锁检查模块 set_task_stack_end_magic(&init_task); smp_setup_processor_id(); // 跟SMP相关 debug_objects_early_init(); // 做一些和debug相关的初始化 /* * Set up the the initial canary ASAP: */ boot_init_stack_canary(); // 栈溢出检测初始化 cgroup_init_early(); local_irq_disable(); // 关闭当前CPU中断 early_boot_irqs_disabled = true; /* * Interrupts are still disabled. Do necessary setups, then * enable them */ boot_cpu_init(); // 跟 CPU 有关的初始化 page_address_init(); // 页地址相关的初始化 pr_notice("%s", linux_banner); setup_arch(&command_line); mm_init_cpumask(&init_mm); setup_command_line(command_line); setup_nr_cpu_ids(); setup_per_cpu_areas(); // 在 SMP 系统中有用,设置每个 CPU 的 per-cpu 数据 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ build_all_zonelists(NULL, NULL); page_alloc_init(); pr_notice("Kernel command line: %s\n", boot_command_line); parse_early_param(); after_dashes = parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, -1, -1, &unknown_bootoption); if (!IS_ERR_OR_NULL(after_dashes)) parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, set_init_arg); jump_label_init(); ... ... thread_info_cache_init(); cred_init(); fork_init(); proc_caches_init(); // 给各种资源管理结构分配缓存 buffer_init(); // 初始化缓冲缓存 key_init(); security_init(); dbg_late_init(); vfs_caches_init(totalram_pages); signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); proc_root_init(); // 注册并挂载 proc 文件系统 nsfs_init(); cpuset_init(); cgroup_init(); taskstats_init_early(); delayacct_init(); check_bugs(); acpi_subsystem_init(); sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { efi_late_init(); efi_free_boot_services(); } ftrace_init(); /* Do the rest non-__init'ed, we're now alive */ rest_init(); // rest_init 函数 }
arch/init.c
... ... static noinline void __init_refok rest_init(void) { int pid; rcu_scheduler_starting(); // 启动 RCU 锁调度器 smpboot_thread_init(); /* * We need to spawn init first so that it obtains pid 1, however * the init task will end up wanting to create kthreads, which, if * we schedule it before we create kthreadd, will OOPS. */ // 创建kernel_init进程,init进程也就是PID为1的进程,init进程一开始是内核进程,后面Init进程会在根文件系统中查看名为"init" 这个程序,这个"init"程序处于用户态 // 通过运行这个"init"程序,Init进程就会实现从内核态到用户态的转变 kernel_thread(kernel_init, NULL, CLONE_FS); numa_default_policy(); pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES); // PID为2的进程 rcu_read_lock(); kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns); rcu_read_unlock(); complete(&kthreadd_done); /* * The boot idle thread must execute schedule() * at least once to get things moving: */ init_idle_bootup_task(current); schedule_preempt_disabled(); /* Call into cpu_idle with preempt disabled */ // 进入 idle 进程,cpu_startup_entry 会调用cpu_idle_loop,cpu_idle_loop 是个 while 循环,也就是 idle 进程代码,idle进程是由主线程演变而来的 // 调用cpu_idle_loop cpu_startup_entry(CPUHP_ONLINE); } ... ...
通过ps -A可查看进程ID
init进程说明:
kernel_init ->kernel_init_freeable ->设置标准输入/标准输出/标准错误使用/dev/console ->ramdisk_execute_command = "/init" ->检查/init是否存在,存在的话就运行 ->uboot传递给linux内核的bootargs可以自定义init = xxx,或者叫命令行参数。有些开发板会设置init = linuxrc ->上一步也不存在,试着运行/sbin/init,还没有的话运行/etc/init,/bin/init,bin/init,这也就是linux用户进程的第一个程序,该程序位于根文件系统中 static int __ref kernel_init(void *unused) { int ret; kernel_init_freeable(); /* need to finish all async __init code before freeing the memory */ async_synchronize_full(); free_initmem(); mark_rodata_ro(); system_state = SYSTEM_RUNNING; numa_default_policy(); flush_delayed_fput(); if (ramdisk_execute_command) { ret = run_init_process(ramdisk_execute_command); if (!ret) return 0; pr_err("Failed to execute %s (error %d)\n", ramdisk_execute_command, ret); } /* * We try each of these until one succeeds. * * The Bourne shell can be used instead of init if we are * trying to recover a really broken machine. */ if (execute_command) { ret = run_init_process(execute_command); if (!ret) return 0; panic("Requested init %s failed (error %d).", execute_command, ret); } if (!try_to_run_init_process("/sbin/init") || !try_to_run_init_process("/etc/init") || !try_to_run_init_process("/bin/init") || !try_to_run_init_process("/bin/sh")) return 0; panic("No working init found. Try passing init= option to kernel. " "See Linux Documentation/init.txt for guidance."); } static noinline void __init kernel_init_freeable(void) { ... ... /* * check if there is an early userspace init. If yes, let it do all * the work */ if (!ramdisk_execute_command) ramdisk_execute_command = "/init"; if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) { ramdisk_execute_command = NULL; prepare_namespace(); } ... ... } static int __init init_setup(char *str) { unsigned int i; execute_command = str; /* * In case LILO is going to boot us with default command line, * it prepends "auto" before the whole cmdline which makes * the shell think it should execute a script with such name. * So we ignore all arguments entered _before_ init=... [MJ] */ for (i = 1; i < MAX_INIT_ARGS; i++) argv_init[i] = NULL; return 1; } __setup("init=", init_setup);