从fork系统调用分析system_call中断处理过程

1. 系统调用初始化流程

内核在完成初始化后, 执行的第一个内核程序是init/main.c中定义的asmlinkage void __init start_kernel(void)启动内核; start_kernel()执行时, 又会调用arch/x86/kernel/traps.c中定义的void __init trap_init(void)初始化陷阱门及中断门;trap_init()通过执行set_system_gate(SYSCALL_VECTOR, &system_call)完成系统调用的挂接。

init/main.c:
500 asmlinkage __visible void __init start_kernel(void)
501 {
502 char *command_line;
503 char *after_dashes;
...
560 sort_main_extable();
561 trap_init(); // 初始化陷阱门及中断门
562 mm_init();
...
}

arch/x86/kernel/traps.c:
792 void __init trap_init(void)
793 {
794 int i;
795 
796 #ifdef CONFIG_EISA
797 void __iomem *p = early_ioremap(0x0FFFD9, 4);
798 
799 if (readl(p) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
800 EISA_bus = 1;
801 early_iounmap(p, 4);
802 #endif
803 
804 set_intr_gate(X86_TRAP_DE, divide_error);
805 set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
806 /* int4 can be called from all */
807 set_system_intr_gate(X86_TRAP_OF, &overflow);
...
837 
838 #ifdef CONFIG_X86_32
839 set_system_trap_gate(SYSCALL_VECTOR, &system_call); // 初始化系统调用
840 set_bit(SYSCALL_VECTOR, used_vectors);
841 #endif
842
...
}

arch/x86/include/asm/irq_vectors.h: 
49 #define IA32_SYSCALL_VECTOR 0x80
50 #ifdef CONFIG_X86_32
51 # define SYSCALL_VECTOR 0x80  // 系统调用中断号: 0x80
52 #endif

2. 系统调用执行过程

在执行"int 0x80"汇编指令时, 便开始执行system_call: 首先, 切换到内核空间, 保护中断现场; 其次, 比较系统调用号是否在允许的范围内(宏NR_syscalls定义了最大的系统调用号), 若不在此范围内, 则跳转到syscall_badsys处执行; 若在此范围内, 则根据调用号获取sys_call_table中对应的服务程序地址, 调用该服务程序; 最后保存返回值, 恢复中断现场。

 488 
 489     # system call handler stub
 490 ENTRY(system_call)
 491     RING0_INT_FRAME         # can't unwind into user space anyway
 492     ASM_CLAC
 493     pushl_cfi %eax          # save orig_eax
 494     SAVE_ALL
 495     GET_THREAD_INFO(%ebp)
 496                     # system call tracing in operation / emulation
 497     testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
 498     jnz syscall_trace_entry
 499     cmpl $(NR_syscalls), %eax # 功能号是否有效
 500     jae syscall_badsys
 501 syscall_call:
 502     call *sys_call_table(,%eax,4) # 执行对应的服务程序
 503 syscall_after_call:
 504     movl %eax,PT_EAX(%esp)      # store the return value
 505 syscall_exit:
 506     LOCKDEP_SYS_EXIT
 507     DISABLE_INTERRUPTS(CLBR_ANY)    # make sure we don't miss an interrupt
 508                     # setting need_resched or sigpending
 509                     # between sampling and the iret
 510     TRACE_IRQS_OFF
 511     movl TI_flags(%ebp), %ecx
 512     testl $_TIF_ALLWORK_MASK, %ecx  # current->work
 513     jne syscall_exit_work
 514 
 515 restore_all:
 516     TRACE_IRQS_IRET
 517 restore_all_notrace:
 518 #ifdef CONFIG_X86_ESPFIX32
 519     movl PT_EFLAGS(%esp), %eax  # mix EFLAGS, SS and CS
 520     # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
 521     # are returning to the kernel.
 522     # See comments in process.c:copy_thread() for details.
 523     movb PT_OLDSS(%esp), %ah
 524     movb PT_CS(%esp), %al
 525     andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
 526     cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
 527     CFI_REMEMBER_STATE
 528     je ldt_ss           # returning to user-space with LDT SS
 529 #endif
 530 restore_nocheck:
 531     RESTORE_REGS 4          # skip orig_eax/error_code
 532 irq_return:
 533     INTERRUPT_RETURN
 534 .section .fixup,"ax"
 535 ENTRY(iret_exc)
 536     pushl $0            # no error code
 537     pushl $do_iret_error
 538     jmp error_code
 539 .previous
 540     _ASM_EXTABLE(irq_return,iret_exc)
 541 
 542 #ifdef CONFIG_X86_ESPFIX32
 543     CFI_RESTORE_STATE
 544 ldt_ss:
 545 #ifdef CONFIG_PARAVIRT
 546     /*
 547      * The kernel can't run on a non-flat stack if paravirt mode
 548      * is active.  Rather than try to fixup the high bits of
 549      * ESP, bypass this code entirely.  This may break DOSemu
 550      * and/or Wine support in a paravirt VM, although the option
 551      * is still available to implement the setting of the high
 552      * 16-bits in the INTERRUPT_RETURN paravirt-op.
 553      */
 554     cmpl $0, pv_info+PARAVIRT_enabled
 555     jne restore_nocheck
 556 #endif
 557 
RING0_INT_FRAME宏定义:
 256 
 257 .macro RING0_INT_FRAME
 258     CFI_STARTPROC simple
 259     CFI_SIGNAL_FRAME
 260     CFI_DEF_CFA esp, 3*4
 261     /*CFI_OFFSET cs, -2*4;*/
 262     CFI_OFFSET eip, -3*4
 263 .endm
 264 

 SAVE_ALL宏定义

 186 .macro SAVE_ALL
 187     cld
 188     PUSH_GS
 189     pushl_cfi %fs
 190     /*CFI_REL_OFFSET fs, 0;*/
 191     pushl_cfi %es
 192     /*CFI_REL_OFFSET es, 0;*/
 193     pushl_cfi %ds
 194     /*CFI_REL_OFFSET ds, 0;*/
 195     pushl_cfi %eax
 196     CFI_REL_OFFSET eax, 0
 197     pushl_cfi %ebp
 198     CFI_REL_OFFSET ebp, 0
 199     pushl_cfi %edi
 200     CFI_REL_OFFSET edi, 0
 201     pushl_cfi %esi
 202     CFI_REL_OFFSET esi, 0
 203     pushl_cfi %edx
 204     CFI_REL_OFFSET edx, 0
 205     pushl_cfi %ecx
 206     CFI_REL_OFFSET ecx, 0
 207     pushl_cfi %ebx
 208     CFI_REL_OFFSET ebx, 0
 209     movl $(__USER_DS), %edx
 210     movl %edx, %ds
 211     movl %edx, %es
 212     movl $(__KERNEL_PERCPU), %edx
 213     movl %edx, %fs
 214     SET_KERNEL_GS %edx
 215 .endm

 

sys_call_table定义:

arch/x86/kernel/syscall_32.c:
__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
[0 ... __NR_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_32.h>
};

arch/x86/include/generated/asm/syscalls_32.h:
__SYSCALL_I386(0, sys_restart_syscall, sys_restart_syscall)
__SYSCALL_I386(1, sys_exit, sys_exit)
__SYSCALL_I386(2, sys_fork, stub32_fork)
__SYSCALL_I386(3, sys_read, sys_read)
__SYSCALL_I386(4, sys_write, sys_write)
__SYSCALL_I386(5, sys_open, compat_sys_open)
__SYSCALL_I386(6, sys_close, sys_close)
__SYSCALL_I386(7, sys_waitpid, sys32_waitpid)
__SYSCALL_I386(8, sys_creat, sys_creat)
__SYSCALL_I386(9, sys_link, sys_link)
__SYSCALL_I386(10, sys_unlink, sys_unlink)
__SYSCALL_I386(11, sys_execve, stub32_execve)
__SYSCALL_I386(12, sys_chdir, sys_chdir)
__SYSCALL_I386(13, sys_time, compat_sys_time)
__SYSCALL_I386(14, sys_mknod, sys_mknod)
__SYSCALL_I386(15, sys_chmod, sys_chmod)
__SYSCALL_I386(16, sys_lchown16, sys_lchown16)
__SYSCALL_I386(18, sys_stat, sys_stat)
__SYSCALL_I386(19, sys_lseek, compat_sys_lseek)
__SYSCALL_I386(20, sys_getpid, sys_getpid)
...

3. fork系统调用执行分析:

在执行系统调用指令之前, 我们先设置了系统调用的功能号"mov $0x02, %eax", 然后执行"int $0x80"。根据在trap_init()中设置的系统陷阱门, 得到中断号0x80对应的中断服务程序的入口地址是system_call, 系统开始执行system_call。先确保不会返回到用户空间, 保护中断现场, 确保系统调用的功能号有效, 并根据该功能号得到系统调用表sys_call_table中的偏移, 从而得到该功能号对应的服务程序入口地址, 即得到sys_fork。 之后, 便调用sys_fork完成fork进程的任务。最后, 保存返回值, 恢复现场。

 

posted @ 2015-04-04 20:19  long#long  阅读(404)  评论(0编辑  收藏  举报