LINUX内核分析第三周学习总结——构造一个简单的Linux系统MenuOS
LINUX内核分析第三周学习总结——构造一个简单的Linux系统MenuOS
张忻(原创作品转载请注明出处)
《Linux内核分析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000
一、使用gdb跟踪调试内核从start_kernel到init进程启动
使用实验楼的虚拟机打开shell
cd LinuxKernel/ qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img
内核启动完成后进入menu程序,支持三个命令help、version和quit。
qemu -kernel linux-3.18.6/arch/x86/boot/bzImage -initrd rootfs.img -s -S # 关于-s和-S选项的说明: -S freeze CPU at startup (use ’c’ to start execution) -s shorthand for -gdb tcp::1234 若不想使用1234端口,则可以使用-gdb tcp:xxxx来取代-s选项
现在系统是stop的状态,如下图:
按c后系统开始运行,启动到start_cernel的位置,如下:
list之后看到执行的位置
再设一个断点,系统执行到rest_init的位置,如下:
二、详细分析从start_kernel到init进程启动的过程
start_kernel函数的执行过程
代码在init目录下的main.c
500asmlinkage __visible void __init start_kernel(void) 501{ 502 char *command_line; 503 char *after_dashes; 504 505 /* 506 * Need to run as early as possible, to initialize the 507 * lockdep hash: 508 */ 509 lockdep_init(); 510 set_task_stack_end_magic(&init_task); init_task即手工创建的PCB,0号进程即最终的idle进程。 511 smp_setup_processor_id(); 512 debug_objects_early_init(); 513 514 /* 515 * Set up the the initial canary ASAP: 516 */ 517 boot_init_stack_canary(); 518 519 cgroup_init_early(); 520 521 local_irq_disable(); 522 early_boot_irqs_disabled = true; 523 524/* 525 * Interrupts are still disabled. Do necessary setups, then 526 * enable them 527 */ 528 boot_cpu_init(); 529 page_address_init(); 530 pr_notice("%s", linux_banner); 531 setup_arch(&command_line); 532 mm_init_cpumask(&init_mm); 533 setup_command_line(command_line); 534 setup_nr_cpu_ids(); 535 setup_per_cpu_areas(); 536 smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ 537 538 build_all_zonelists(NULL, NULL); 539 page_alloc_init(); 540 541 pr_notice("Kernel command line: %s\n", boot_command_line); 542 parse_early_param(); 543 after_dashes = parse_args("Booting kernel", 544 static_command_line, __start___param, 545 __stop___param - __start___param, 546 -1, -1, &unknown_bootoption); 547 if (!IS_ERR_OR_NULL(after_dashes)) 548 parse_args("Setting init args", after_dashes, NULL, 0, -1, -1, 549 set_init_arg); 550 551 jump_label_init(); 552 553 /* 554 * These use large bootmem allocations and must precede 555 * kmem_cache_init() 556 */ 557 setup_log_buf(0); 558 pidhash_init(); 559 vfs_caches_init_early(); 560 sort_main_extable(); 561 trap_init(); 涉及到中断的初始化 562 mm_init(); 563 564 /* 565 * Set up the scheduler prior starting any interrupts (such as the 566 * timer interrupt). Full topology setup happens at smp_init() 567 * time - but meanwhile we still have a functioning scheduler. 568 */ 569 sched_init(); 570 /* 571 * Disable preemption - early bootup scheduling is extremely 572 * fragile until we cpu_idle() for the first time. 573 */ 574 preempt_disable(); 575 if (WARN(!irqs_disabled(), 576 "Interrupts were enabled *very* early, fixing it\n")) 577 local_irq_disable(); 578 idr_init_cache(); 579 rcu_init(); 580 context_tracking_init(); 581 radix_tree_init(); 582 /* init some links before init_ISA_irqs() */ 583 early_irq_init(); 584 init_IRQ(); 585 tick_init(); 586 rcu_init_nohz(); 587 init_timers(); 588 hrtimers_init(); 589 softirq_init(); 590 timekeeping_init(); 591 time_init(); 592 sched_clock_postinit(); 593 perf_event_init(); 594 profile_init(); 595 call_function_init(); 596 WARN(!irqs_disabled(), "Interrupts were enabled early\n"); 597 early_boot_irqs_disabled = false; 598 local_irq_enable(); 599 600 kmem_cache_init_late(); 601 602 /* 603 * HACK ALERT! This is early. We're enabling the console before 604 * we've done PCI setups etc, and console_init() must be aware of 605 * this. But we do want output early, in case something goes wrong. 606 */ 607 console_init(); 608 if (panic_later) 609 panic("Too many boot %s vars at `%s'", panic_later, 610 panic_param); 611 612 lockdep_info(); 613 614 /* 615 * Need to run this when irqs are enabled, because it wants 616 * to self-test [hard/soft]-irqs on/off lock inversion bugs 617 * too: 618 */ 619 locking_selftest(); 620 621#ifdef CONFIG_BLK_DEV_INITRD 622 if (initrd_start && !initrd_below_start_ok && 623 page_to_pfn(virt_to_page((void *)initrd_start)) < min_low_pfn) { 624 pr_crit("initrd overwritten (0x%08lx < 0x%08lx) - disabling it.\n", 625 page_to_pfn(virt_to_page((void *)initrd_start)), 626 min_low_pfn); 627 initrd_start = 0; 628 } 629#endif 630 page_cgroup_init(); 631 debug_objects_mem_init(); 632 kmemleak_init(); 633 setup_per_cpu_pageset(); 634 numa_policy_init(); 635 if (late_time_init) 636 late_time_init(); 637 sched_clock_init(); 638 calibrate_delay(); 639 pidmap_init(); 640 anon_vma_init(); 641 acpi_early_init(); 642#ifdef CONFIG_X86 643 if (efi_enabled(EFI_RUNTIME_SERVICES)) 644 efi_enter_virtual_mode(); 645#endif 646#ifdef CONFIG_X86_ESPFIX64 647 /* Should be run before the first non-init thread is created */ 648 init_espfix_bsp(); 649#endif 650 thread_info_cache_init(); 651 cred_init(); 652 fork_init(totalram_pages); 653 proc_caches_init(); 654 buffer_init(); 655 key_init(); 656 security_init(); 657 dbg_late_init(); 658 vfs_caches_init(totalram_pages); 659 signals_init(); 660 /* rootfs populating might need page-writeback */ 661 page_writeback_init(); 662 proc_root_init(); 663 cgroup_init(); 664 cpuset_init(); 665 taskstats_init_early(); 666 delayacct_init(); 667 668 check_bugs(); 669 670 sfi_init_late(); 671 672 if (efi_enabled(EFI_RUNTIME_SERVICES)) { 673 efi_late_init(); 674 efi_free_boot_services(); 675 } 676 677 ftrace_init(); 678 679 /* Do the rest non-__init'ed, we're now alive */ 680 rest_init(); 681}
不管分析内核的哪一部分都会涉及到start_cernel。
(1)561 trap_init();
涉及到中断的初始化
只需查看 /linux-3.18.6/arch/x86/kernel/
其中设置了很多中断门。
设置系统陷阱门:
839 set_system_trap_gate(SYSCALL_VECTOR, &system_call); 840 set_bit(SYSCALL_VECTOR, used_vectors);
(2)562 mm_init();
系统管理模块;
(3)569 sched_init();
调度模块;
(4)680 rest_init();
403 kernel_thread(kernel_init, NULL, CLONE_FS);
kernel_init里:
944 if (ramdisk_execute_command) { 945 ret = run_init_process(ramdisk_execute_command); 946 if (!ret) 947 return 0; 948 pr_err("Failed to execute %s (error %d)\n", 949 ramdisk_execute_command, ret); 950 }
以上创建用户态的一号进程
405 pid = kernel_thread(kthreadd, NULL, CLONE_FS | CLONE_FILES);
以上创建内核线程
417 /* Call into cpu_idle with preempt disabled */ 418 cpu_startup_entry(CPUHP_ONLINE);
418行函数 具体代码为:
256void cpu_startup_entry(enum cpuhp_state state) 257{ 258 /* 259 * This #ifdef needs to die, but it's too late in the cycle to 260 * make this generic (arm and sh have never invoked the canary 261 * init for the non boot cpus!). Will be fixed in 3.11 262 */ 263#ifdef CONFIG_X86 264 /* 265 * If we're the non-boot CPU, nothing set the stack canary up 266 * for us. The boot CPU already has it initialized but no harm 267 * in doing it again. This is a good place for updating it, as 268 * we wont ever return from this function (so the invalid 269 * canaries already on the stack wont ever trigger). 270 */ 271 boot_init_stack_canary(); 272#endif 273 arch_cpu_idle_prepare(); 274 cpu_idle_loop(); 275} 276
其中第274行函数代码实现0号进程
当系统没有进程需要执行时就调度到idle进程。
(5)回顾总结
start_kernel启动时,0号进程——rest_init()会一直存在。0号进程创建了1号进程kernel_init()。
三、总结
对“Linux系统启动过程”的理解。
task 0 的进程结构(task_struct init_task)由INIT_TASK宏静态定义。该结构体(init_task)在linux 启动时被设置为current_task。当初始化到rest_init函数中时,通过kernel_thread函数启动第一个内核线程 kernel_init。kernel_init再通过do_execve启动/sbin/init。这就是我们看到的init进程,进程号为1。初始化 的最后linux调用scheule()整个系统就运行起来了。