Linux内核启动流程简介

 
Linux内核链接脚本arch/arm/kernel/vmlinux.lds
 
arch/arm/kernel/vmlinux.lds中
OUTPUT_ARCH(arm)
ENTRY(stext)        // Linux内核入口,入口函数stext
jiffies = jiffies_64;
SECTIONS
{
 /*
         * XXX: The linker does not define how output sections are
         * assigned to input sections when there are multiple statements
         * matching the same input section name.  There is no documented
         * order of matching.
         *
         * unwind exit sections must be discarded before the rest of the
         * unwind sections get included.
         */
 /DISCARD/ : {
  *(.ARM.exidx.exit.text)
  *(.ARM.extab.exit.text)

... ... 
 
arch/arm/kernel/head.S
 /*
  * Kernel startup entry point.
  * ---------------------------
  *
  * This is normally called from the decompressor code.  The requirements
  * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0,
  * r1 = machine nr, r2 = atags or dtb pointer.
  *
  * This code is mostly position independent, so if you link the kernel at
  * 0xc0008000, you call this at __pa(0xc0008000).
  *
  * See linux/arch/arm/tools/mach-types for the complete list of machine
  * numbers for r1.
  *
  * We're trying to keep crap to a minimum; DO NOT add any machine specific
  * crap here - that's what the boot loader (or in extreme, well justified
  * circumstances, zImage) is for.
  */

ENTRY(stext)
 ARM_BE8(setend be )                    @ ensure we are in BE8 mode

 THUMB( adr     r9, BSYM(1f)    )       @ Kernel is always entered in ARM.
 THUMB( bx      r9              )       @ If this is a Thumb-2 kernel,
 THUMB( .thumb                  )       @ switch to Thumb now.
 THUMB(1:                       )

#ifdef CONFIG_ARM_VIRT_EXT
        bl      __hyp_stub_install
#endif
        @ ensure svc mode and all interrupts masked
        safe_svcmode_maskall r9                    // 确保CPU处于svc模式,并且关闭了所有的中断

        mrc     p15, 0, r9, c0, c0              @ get processor id
        bl      __lookup_processor_type         @ r5=procinfo r9=cpuid        // 检查当前系统是否支持此CPU,如果支持就获取procinfo信息
        movs    r10, r5                         @ invalid processor (r5=0)?
 THUMB( it      eq )            @ force fixup-able long branch encoding
        beq     __error_p                       @ yes, error 'p'

... ...
         /*
          * r1 = machine no, r2 = atags or dtb,
          * r8 = phys_offset, r9 = cpuid, r10 = procinfo
          */
         bl      __vet_atags
 #ifdef CONFIG_SMP_ON_UP
         bl      __fixup_smp
 #endif
 #ifdef CONFIG_ARM_PATCH_PHYS_VIRT
         bl      __fixup_pv_table
 #endif
         bl      __create_page_tables        // 创建页表
         /*
          * The following calls CPU specific code in a position independent
          * manner.  See arch/arm/mm/proc-*.S for details.  r10 = base of
          * xxx_proc_info structure selected by __lookup_processor_type
          * above.  On return, the CPU will be ready for the MMU to be
          * turned on, and r0 will hold the CPU control register value.
          */
         ldr     r13, =__mmap_switched           @ address to jump to after    // r13保存着__mmap_switched函数的地址
                                                 @ mmu has been enabled
         adr     lr, BSYM(1f)                    @ return (PIC) address
         mov     r8, r4                          @ set TTBR1 to swapper_pg_dir
         ldr     r12, [r10, #PROCINFO_INITFUNC]
         add     r12, r12, r10
         ret     r12
 1:      b       __enable_mmu        // 使能MMU
 ENDPROC(stext)
image和zImage都是经过压缩的,Linux内核会先进行解压缩,解压缩完成以后就要运行Linux内核,要求:
1、MMU关闭
2、D cache关闭
3、I cache无所谓
4、r0 = 0,uboot中kernel_entry函数传递来的
5、r1 = machine id,在使用设备树的内核没有用到,uboot中kernel_entry函数传递来的
6、r2 设备树首地址,uboot中kernel_entry函数传递来的
 
arch/arm/kernel/head-common.S
... ...
__mmap_switched:
        adr     r3, __mmap_switched_data

        ldmia   r3!, {r4, r5, r6, r7}
        cmp     r4, r5                          @ Copy data segment if needed
1:      cmpne   r5, r6
        ldrne   fp, [r4], #4
        strne   fp, [r5], #4
        bne     1b

        mov     fp, #0                          @ Clear BSS (and zero fp)
1:      cmp     r6, r7
        strcc   fp, [r6],#4
        bcc     1b

 ARM(   ldmia   r3, {r4, r5, r6, r7, sp})
 THUMB( ldmia   r3, {r4, r5, r6, r7}    )
 THUMB( ldr     sp, [r3, #16]           )
        str     r9, [r4]                        @ Save processor ID
        str     r1, [r5]                        @ Save machine type
        str     r2, [r6]                        @ Save atags pointer
        cmp     r7, #0
        strne   r0, [r7]                        @ Save control register values
        b       start_kernel        // 启动linux内核,该函数在init/main.c中
ENDPROC(__mmap_switched)
... ...
 
init/main.c
 asmlinkage __visible void __init start_kernel(void)    // 用于声明函数调用约定的宏,它告诉编译器不要对函数参数进行寄存器优化
 {
         char *command_line;
         char *after_dashes;


         lockdep_init();                        // lockstep是死锁检查模块
         set_task_stack_end_magic(&init_task);
         smp_setup_processor_id();               // 跟SMP相关
         debug_objects_early_init();            // 做一些和debug相关的初始化
    
         /*
          * Set up the the initial canary ASAP:
          */
         boot_init_stack_canary();            // 栈溢出检测初始化

         cgroup_init_early();

         local_irq_disable();                // 关闭当前CPU中断
         early_boot_irqs_disabled = true;

 /*
  * Interrupts are still disabled. Do necessary setups, then
  * enable them
  */
         boot_cpu_init();                    //  跟 CPU 有关的初始化
         page_address_init();                // 页地址相关的初始化
         pr_notice("%s", linux_banner);
         setup_arch(&command_line);
         mm_init_cpumask(&init_mm);
         setup_command_line(command_line);
         setup_nr_cpu_ids();
         setup_per_cpu_areas();            //  在 SMP 系统中有用,设置每个 CPU 的 per-cpu 数据
         smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */

         build_all_zonelists(NULL, NULL);
         page_alloc_init();

         pr_notice("Kernel command line: %s\n", boot_command_line);
         parse_early_param();
         after_dashes = parse_args("Booting kernel",
                                   static_command_line, __start___param,
                                   __stop___param - __start___param,
                                   -1, -1, &unknown_bootoption);
         if (!IS_ERR_OR_NULL(after_dashes))
                 parse_args("Setting init args", after_dashes, NULL, 0, -1, -1,
                            set_init_arg);

         jump_label_init();
... ...


         thread_info_cache_init();
         cred_init();
         fork_init();
         proc_caches_init();            // 给各种资源管理结构分配缓存
         buffer_init();                //  初始化缓冲缓存 
         key_init();
         security_init();
         dbg_late_init();
         vfs_caches_init(totalram_pages);
         signals_init();
         /* rootfs populating might need page-writeback */
         page_writeback_init();
         proc_root_init();            // 注册并挂载 proc 文件系统
         nsfs_init();
         cpuset_init();
         cgroup_init();
         taskstats_init_early();
         delayacct_init();

         check_bugs();

         acpi_subsystem_init();
         sfi_init_late();

         if (efi_enabled(EFI_RUNTIME_SERVICES)) {
                 efi_late_init();
                 efi_free_boot_services();
         }

         ftrace_init();

         /* Do the rest non-__init'ed, we're now alive */
         rest_init();                // rest_init 函数
 }

 

arch/init.c
... ...
  static noinline void __init_refok rest_init(void)
 {
         int pid;

         rcu_scheduler_starting();            // 启动 RCU 锁调度器
         smpboot_thread_init();
         /*
          * We need to spawn init first so that it obtains pid 1, however
          * the init task will end up wanting to create kthreads, which, if
          * we schedule it before we create kthreadd, will OOPS.
          */
         // 创建kernel_init进程,init进程也就是PID为1的进程,init进程一开始是内核进程,后面Init进程会在根文件系统中查看名为"init" 这个程序,这个"init"程序处于用户态
         // 通过运行这个"init"程序,Init进程就会实现从内核态到用户态的转变                    
         kernel_thread(kernel_init, NULL, CLONE_FS);                 
         numa_default_policy();
         pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);    // PID为2的进程
         rcu_read_lock();
         kthreadd_task = find_task_by_pid_ns(pid, &init_pid_ns);
         rcu_read_unlock();
         complete(&kthreadd_done);

         /*
          * The boot idle thread must execute schedule()
          * at least once to get things moving:
          */
         init_idle_bootup_task(current);
         schedule_preempt_disabled();
         /* Call into cpu_idle with preempt disabled */
         // 进入 idle 进程,cpu_startup_entry 会调用cpu_idle_loop,cpu_idle_loop 是个 while 循环,也就是 idle 进程代码,idle进程是由主线程演变而来的
         // 调用cpu_idle_loop
         cpu_startup_entry(CPUHP_ONLINE);    
 }
 ... ...
 
 
通过ps -A可查看进程ID
0
 
init进程说明:
kernel_init
    ->kernel_init_freeable
        ->设置标准输入/标准输出/标准错误使用/dev/console
        ->ramdisk_execute_command = "/init"
    ->检查/init是否存在,存在的话就运行
    ->uboot传递给linux内核的bootargs可以自定义init = xxx,或者叫命令行参数。有些开发板会设置init = linuxrc
    ->上一步也不存在,试着运行/sbin/init,还没有的话运行/etc/init,/bin/init,bin/init,这也就是linux用户进程的第一个程序,该程序位于根文件系统中

 static int __ref kernel_init(void *unused)
 {
         int ret;

         kernel_init_freeable();
         /* need to finish all async __init code before freeing the memory */
         async_synchronize_full();
         free_initmem();
         mark_rodata_ro();
         system_state = SYSTEM_RUNNING;
         numa_default_policy();

         flush_delayed_fput();

         if (ramdisk_execute_command) {
                 ret = run_init_process(ramdisk_execute_command);
                 if (!ret)
                         return 0;
                 pr_err("Failed to execute %s (error %d)\n",
                        ramdisk_execute_command, ret);
         }

         /*
          * We try each of these until one succeeds.
          *
          * The Bourne shell can be used instead of init if we are
          * trying to recover a really broken machine.
          */
         if (execute_command) {
                 ret = run_init_process(execute_command);
                 if (!ret)
                         return 0;
                 panic("Requested init %s failed (error %d).",
                       execute_command, ret);
         }
         if (!try_to_run_init_process("/sbin/init") ||
             !try_to_run_init_process("/etc/init") ||
             !try_to_run_init_process("/bin/init") ||
             !try_to_run_init_process("/bin/sh"))
                 return 0;

         panic("No working init found.  Try passing init= option to kernel. "
               "See Linux Documentation/init.txt for guidance.");
 }

static noinline void __init kernel_init_freeable(void) {                                
... ...
         /*
          * check if there is an early userspace init.  If yes, let it do all
          * the work
          */
         if (!ramdisk_execute_command)
                 ramdisk_execute_command = "/init";

         if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {
                 ramdisk_execute_command = NULL;
                 prepare_namespace();
         }
... ...
}

 static int __init init_setup(char *str)
 {
         unsigned int i;

         execute_command = str;
         /*
          * In case LILO is going to boot us with default command line,
          * it prepends "auto" before the whole cmdline which makes
          * the shell think it should execute a script with such name.
          * So we ignore all arguments entered _before_ init=... [MJ]
          */
         for (i = 1; i < MAX_INIT_ARGS; i++)
                 argv_init[i] = NULL;
         return 1;
 }
 __setup("init=", init_setup);
posted @ 2024-03-22 15:58  lethe1203  阅读(11)  评论(0编辑  收藏  举报