struct kexec_info { struct kexec_segment *segment; int nr_segments; struct memory_range *memory_range; int memory_ranges; struct memory_range *crash_range; int nr_crash_ranges; void *entry; struct mem_ehdr rhdr; unsigned long backup_start; unsigned long kexec_flags; unsigned long backup_src_start; unsigned long backup_src_size; /* Set to 1 if we are using kexec file syscall */ unsigned long file_mode :1; /* Filled by kernel image processing code */ int initrd_fd; char *command_line; int command_line_len; };
SYSCALL_DEFINE4(kexec_load, unsigned long, entry, unsigned long, nr_segments, struct kexec_segment __user *, segments, unsigned long, flags) { int result; /* We only trust the superuser with rebooting the system. */ if (!capable(CAP_SYS_BOOT) || kexec_load_disabled) return -EPERM; /* * Verify we have a legal set of flags * This leaves us room for future extensions. */ if ((flags & KEXEC_FLAGS) != (flags & ~KEXEC_ARCH_MASK)) return -EINVAL; /* Verify we are on the appropriate architecture */ if (((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH) && ((flags & KEXEC_ARCH_MASK) != KEXEC_ARCH_DEFAULT)) return -EINVAL; /* Put an artificial cap on the number * of segments passed to kexec_load. */ if (nr_segments > KEXEC_SEGMENT_MAX) return -EINVAL; /* Because we write directly to the reserved memory * region when loading crash kernels we need a mutex here to * prevent multiple crash kernels from attempting to load * simultaneously, and to prevent a crash kernel from loading * over the top of a in use crash kernel. * * KISS: always take the mutex. */ if (!mutex_trylock(&kexec_mutex)) return -EBUSY; result = do_kexec_load(entry, nr_segments, segments, flags); mutex_unlock(&kexec_mutex); return result; }
static int do_kexec_load(unsigned long entry, unsigned long nr_segments, struct kexec_segment __user *segments, unsigned long flags) { struct kimage **dest_image, *image; unsigned long i; int ret; if (flags & KEXEC_ON_CRASH) { dest_image = &kexec_crash_image; if (kexec_crash_image) arch_kexec_unprotect_crashkres(); } else { dest_image = &kexec_image; } if (nr_segments == 0) { /* Uninstall image */ kimage_free(xchg(dest_image, NULL)); return 0; } if (flags & KEXEC_ON_CRASH) { /* * Loading another kernel to switch to if this one * crashes. Free any current crash dump kernel before * we corrupt it. */ kimage_free(xchg(&kexec_crash_image, NULL)); } ret = kimage_alloc_init(&image, entry, nr_segments, segments, flags); if (ret) return ret; if (flags & KEXEC_PRESERVE_CONTEXT) image->preserve_context = 1; ret = machine_kexec_prepare(image); if (ret) goto out; for (i = 0; i < nr_segments; i++) { ret = kimage_load_segment(image, &image->segment[i]); if (ret) goto out; } kimage_terminate(image); /* Install the new kernel and uninstall the old */ image = xchg(dest_image, image); out: if ((flags & KEXEC_ON_CRASH) && kexec_crash_image) arch_kexec_protect_crashkres(); kimage_free(image); return ret; }
struct kimage { kimage_entry_t head; kimage_entry_t *entry; kimage_entry_t *last_entry; unsigned long start; struct page *control_code_page; struct page *swap_page; unsigned long nr_segments; struct kexec_segment segment[KEXEC_SEGMENT_MAX]; struct list_head control_pages; struct list_head dest_pages; struct list_head unusable_pages; /* Address of next control page to allocate for crash kernels. */ unsigned long control_page; /* Flags to indicate special processing */ unsigned int type : 1; #define KEXEC_TYPE_DEFAULT 0 #define KEXEC_TYPE_CRASH 1 unsigned int preserve_context : 1; /* If set, we are using file mode kexec syscall */ unsigned int file_mode:1; #ifdef ARCH_HAS_KIMAGE_ARCH struct kimage_arch arch; #endif #ifdef CONFIG_KEXEC_FILE /* Additional fields for file based kexec syscall */ void *kernel_buf; unsigned long kernel_buf_len; void *initrd_buf; unsigned long initrd_buf_len; char *cmdline_buf; unsigned long cmdline_buf_len; /* File operations provided by image loader */ struct kexec_file_ops *fops; /* Image loader handling the kernel can store a pointer here */ void *image_loader_data; /* Information for loading purgatory */ struct purgatory_info purgatory_info; #endif };
/* * No panic_cpu check version of crash_kexec(). This function is called * only when panic_cpu holds the current CPU number; this is the only CPU * which processes crash_kexec routines. */ void __crash_kexec(struct pt_regs *regs) { /* Take the kexec_mutex here to prevent sys_kexec_load * running on one cpu from replacing the crash kernel * we are using after a panic on a different cpu. * * If the crash kernel was not located in a fixed area * of memory the xchg(&kexec_crash_image) would be * sufficient. But since I reuse the memory... */ if (mutex_trylock(&kexec_mutex)) { if (kexec_crash_image) { struct pt_regs fixed_regs; crash_setup_regs(&fixed_regs, regs); crash_save_vmcoreinfo(); machine_crash_shutdown(&fixed_regs); machine_kexec(kexec_crash_image); } mutex_unlock(&kexec_mutex); } }
/** * machine_kexec - Do the kexec reboot. * * Called from the core kexec code for a sys_reboot with LINUX_REBOOT_CMD_KEXEC. */ void machine_kexec(struct kimage *kimage) { phys_addr_t reboot_code_buffer_phys; void *reboot_code_buffer; bool in_kexec_crash = (kimage == kexec_crash_image); bool stuck_cpus = cpus_are_stuck_in_kernel(); clear_abnormal_magic(); /* * New cpus may have become stuck_in_kernel after we loaded the image. */ BUG_ON(!in_kexec_crash && (stuck_cpus || (num_online_cpus() > 1))); WARN(in_kexec_crash && (stuck_cpus || smp_crash_stop_failed()), "Some CPUs may be stale, kdump will be unreliable.\n"); reboot_code_buffer_phys = page_to_phys(kimage->control_code_page); reboot_code_buffer = phys_to_virt(reboot_code_buffer_phys); kexec_image_info(kimage); pr_debug("%s:%d: control_code_page: %p\n", __func__, __LINE__, kimage->control_code_page); pr_debug("%s:%d: reboot_code_buffer_phys: %pa\n", __func__, __LINE__, &reboot_code_buffer_phys); pr_debug("%s:%d: reboot_code_buffer: %p\n", __func__, __LINE__, reboot_code_buffer); pr_debug("%s:%d: relocate_new_kernel: %p\n", __func__, __LINE__, arm64_relocate_new_kernel); pr_debug("%s:%d: relocate_new_kernel_size: 0x%lx(%lu) bytes\n", __func__, __LINE__, arm64_relocate_new_kernel_size, arm64_relocate_new_kernel_size); /* * Copy arm64_relocate_new_kernel to the reboot_code_buffer for use * after the kernel is shut down. */ memcpy(reboot_code_buffer, arm64_relocate_new_kernel, arm64_relocate_new_kernel_size); /* Flush the reboot_code_buffer in preparation for its execution. */ __flush_dcache_area(reboot_code_buffer, arm64_relocate_new_kernel_size); flush_icache_range((uintptr_t)reboot_code_buffer, arm64_relocate_new_kernel_size); /* Flush the kimage list and its buffers. */ kexec_list_flush(kimage); /* Flush the new image if already in place. */ if ((kimage != kexec_crash_image) && (kimage->head & IND_DONE)) kexec_segment_flush(kimage); pr_info("Bye!\n"); /* Disable all DAIF exceptions. */ asm volatile ("msr daifset, #0xf" : : : "memory"); /* * cpu_soft_restart will shutdown the MMU, disable data caches, then * transfer control to the reboot_code_buffer which contains a copy of * the arm64_relocate_new_kernel routine. arm64_relocate_new_kernel * uses physical addressing to relocate the new image to its final * position and transfers control to the image entry point when the * relocation is complete. */ cpu_soft_restart(kimage != kexec_crash_image, reboot_code_buffer_phys, kimage->head, kimage->start, 0); BUG(); /* Should never get here. */ }
machine_kexec()函数完成的主要功能
将arm64_relocate_new_kernel拷贝到kimage的控制代码页中
调用cpu_soft_restart, 传入kimage的重要参数
1 2 3 4 5 6 7 8 9 10 11 12 13 14
static inline void __noreturn cpu_soft_restart(unsigned long el2_switch, unsigned long entry, unsigned long arg0, unsigned long arg1, unsigned long arg2) { typeof(__cpu_soft_restart) *restart; el2_switch = el2_switch && !is_kernel_in_hyp_mode() && is_hyp_mode_available(); restart = (void *)virt_to_phys(__cpu_soft_restart); cpu_install_idmap(); restart(el2_switch, entry, arg0, arg1, arg2); unreachable(); }
/* * __cpu_soft_restart(el2_switch, entry, arg0, arg1, arg2) - Helper for * cpu_soft_restart. * * @el2_switch: Flag to indicate a swich to EL2 is needed. * @entry: Location to jump to for soft reset. * arg0: First argument passed to @entry. * arg1: Second argument passed to @entry. * arg2: Third argument passed to @entry. * * Put the CPU into the same state as it would be if it had been reset, and * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ ENTRY(__cpu_soft_restart) /* Clear sctlr_el1 flags. */ mrs x12, sctlr_el1 ldr x13, =SCTLR_ELx_FLAGS bic x12, x12, x13 msr sctlr_el1, x12 isb cbz x0, 1f // el2_switch? mov x0, #HVC_SOFT_RESTART hvc #0 // no return 1: mov x18, x1 // entry mov x0, x2 // arg0 mov x1, x3 // arg1 mov x2, x4 // arg2 br x18 ENDPROC(__cpu_soft_restart)
/* * arm64_relocate_new_kernel - Put a 2nd stage image in place and boot it. * * The memory that the old kernel occupies may be overwritten when coping the * new image to its final location. To assure that the * arm64_relocate_new_kernel routine which does that copy is not overwritten, * all code and data needed by arm64_relocate_new_kernel must be between the * symbols arm64_relocate_new_kernel and arm64_relocate_new_kernel_end. The * machine_kexec() routine will copy arm64_relocate_new_kernel to the kexec * control_code_page, a special page which has been set up to be preserved * during the copy operation. */ ENTRY(arm64_relocate_new_kernel) /* Setup the list loop variables. */ mov x17, x1 /* x17 = kimage_start */ mov x16, x0 /* x16 = kimage_head */ raw_dcache_line_size x15, x0 /* x15 = dcache line size */ mov x14, xzr /* x14 = entry ptr */ mov x13, xzr /* x13 = copy dest */ /* Clear the sctlr_el2 flags. */ mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.ne 1f mrs x0, sctlr_el2 ldr x1, =SCTLR_ELx_FLAGS bic x0, x0, x1 msr sctlr_el2, x0 isb 1: /* Check if the new image needs relocation. */ tbnz x16, IND_DONE_BIT, .Ldone .Lloop: and x12, x16, PAGE_MASK /* x12 = addr */ /* Test the entry flags. */ .Ltest_source: tbz x16, IND_SOURCE_BIT, .Ltest_indirection /* Invalidate dest page to PoC. */ mov x0, x13 add x20, x0, #PAGE_SIZE sub x1, x15, #1 bic x0, x0, x1 2: dc ivac, x0 add x0, x0, x15 cmp x0, x20 b.lo 2b dsb sy mov x20, x13 mov x21, x12 copy_page x20, x21, x0, x1, x2, x3, x4, x5, x6, x7 /* dest += PAGE_SIZE */ add x13, x13, PAGE_SIZE b .Lnext .Ltest_indirection: tbz x16, IND_INDIRECTION_BIT, .Ltest_destination /* ptr = addr */ mov x14, x12 b .Lnext .Ltest_destination: tbz x16, IND_DESTINATION_BIT, .Lnext /* dest = addr */ mov x13, x12 .Lnext: /* entry = *ptr++ */ ldr x16, [x14], #8 /* while (!(entry & DONE)) */ tbz x16, IND_DONE_BIT, .Lloop .Ldone: /* wait for writes from copy_page to finish */ dsb nsh ic iallu dsb nsh isb /* Start new image. */ mov x0, xzr mov x1, xzr mov x2, xzr mov x3, xzr br x17 ENDPROC(arm64_relocate_new_kernel)
/* Init function for vmcore module. */ static int __init vmcore_init(void) { int rc = 0; /* Allow architectures to allocate ELF header in 2nd kernel */ rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size); if (rc) return rc; /* * If elfcorehdr= has been passed in cmdline or created in 2nd kernel, * then capture the dump. */ if (!(is_vmcore_usable())) return rc; rc = parse_crash_elf_headers(); if (rc) { pr_warn("Kdump: vmcore not initialized\n"); return rc; } elfcorehdr_free(elfcorehdr_addr); elfcorehdr_addr = ELFCORE_ADDR_ERR; proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); if (proc_vmcore) proc_vmcore->size = vmcore_size; return 0; } fs_initcall(vmcore_init);
/* * is_kdump_kernel() checks whether this kernel is booting after a panic of * previous kernel or not. This is determined by checking if previous kernel * has passed the elf core header address on command line. * * This is not just a test if CONFIG_CRASH_DUMP is enabled or not. It will * return 1 if CONFIG_CRASH_DUMP=y and if kernel is booting after a panic of * previous kernel. */ static inline int is_kdump_kernel(void) { return (elfcorehdr_addr != ELFCORE_ADDR_MAX) ? 1 : 0; } /* is_vmcore_usable() checks if the kernel is booting after a panic and * the vmcore region is usable. * * This makes use of the fact that due to alignment -2ULL is not * a valid pointer, much in the vain of IS_ERR(), except * dealing directly with an unsigned long long rather than a pointer. */ static inline int is_vmcore_usable(void) { return is_kdump_kernel() && elfcorehdr_addr != ELFCORE_ADDR_ERR ? 1 : 0; }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· 单线程的Redis速度为什么快?
· 展开说说关于C#中ORM框架的用法!
· Pantheons:用 TypeScript 打造主流大模型对话的一站式集成库
2020-03-24 如何在Android 或Linux 下,做Suspend /Resume 的Debug【转】
2020-03-24 Linux Suspend过程【转】
2016-03-24 7.2. 获知当前时间【转】
2016-03-24 5-3 Linux内核计时、延时函数与内核定时器【转】
2016-03-24 linux中时间精度的获取问题【转】
2016-03-24 在linux内核中获得比jiffies精度更高的时间值【转】
2016-03-24 Linux时间子系统之一:clock source(时钟源)【转】