kernel源码(十一)system_call.s

1 系统调用流程图

 2 源码

/*
 *  linux/kernel/system_call.s
 *
 *  (C) 1991  Linus Torvalds
 */

/*
 *  system_call.s  contains the system-call low-level handling routines.
 * This also contains the timer-interrupt handler, as some of the code is
 * the same. The hd- and flopppy-interrupts are also here.
 *
 * NOTE: This code handles signal-recognition, which happens every time
 * after a timer-interrupt and after each system call. Ordinary interrupts
 * don't handle signal-recognition, as that would clutter them up totally
 * unnecessarily.
 *
 * Stack layout in 'ret_from_system_call':
 *
 *     0(%esp) - %eax
 *     4(%esp) - %ebx
 *     8(%esp) - %ecx
 *     C(%esp) - %edx
 *    10(%esp) - %fs
 *    14(%esp) - %es
 *    18(%esp) - %ds
 *    1C(%esp) - %eip
 *    20(%esp) - %cs
 *    24(%esp) - %eflags
 *    28(%esp) - %oldesp
 *    2C(%esp) - %oldss
 */

SIG_CHLD    = 17

EAX        = 0x00
EBX        = 0x04
ECX        = 0x08
EDX        = 0x0C
FS        = 0x10
ES        = 0x14
DS        = 0x18
EIP        = 0x1C
CS        = 0x20
EFLAGS        = 0x24
OLDESP        = 0x28
OLDSS        = 0x2C

state    = 0        # these are offsets into the task-struct.
counter    = 4
priority = 8
signal    = 12
sigaction = 16        # MUST be 16 (=len of sigaction)
blocked = (33*16)

# offsets within sigaction
sa_handler = 0
sa_mask = 4
sa_flags = 8
sa_restorer = 12

nr_system_calls = 72

/*
 * Ok, I get parallel printer interrupts while using the floppy for some
 * strange reason. Urgel. Now I just ignore them.
 */
.globl _system_call,_sys_fork,_timer_interrupt,_sys_execve
.globl _hd_interrupt,_floppy_interrupt,_parallel_interrupt
.globl _device_not_available, _coprocessor_error

.align 2
bad_sys_call:
    movl $-1,%eax
    iret
.align 2
reschedule:
    pushl $ret_from_sys_call
    jmp _schedule
.align 2
_system_call:
    cmpl $nr_system_calls-1,%eax
    ja bad_sys_call
    push %ds
    push %es
    push %fs
    pushl %edx
    pushl %ecx        # push %ebx,%ecx,%edx as parameters
    pushl %ebx        # to the system call
    movl $0x10,%edx        # set up ds,es to kernel space
    mov %dx,%ds
    mov %dx,%es
    movl $0x17,%edx        # fs points to local data space
    mov %dx,%fs
    call _sys_call_table(,%eax,4)
    pushl %eax
    movl _current,%eax
    cmpl $0,state(%eax)        # state
    jne reschedule
    cmpl $0,counter(%eax)        # counter
    je reschedule
ret_from_sys_call:
    movl _current,%eax        # task[0] cannot have signals
    cmpl _task,%eax
    je 3f
    cmpw $0x0f,CS(%esp)        # was old code segment supervisor ?
    jne 3f
    cmpw $0x17,OLDSS(%esp)        # was stack segment = 0x17 ?
    jne 3f
    movl signal(%eax),%ebx
    movl blocked(%eax),%ecx
    notl %ecx
    andl %ebx,%ecx
    bsfl %ecx,%ecx
    je 3f
    btrl %ecx,%ebx
    movl %ebx,signal(%eax)
    incl %ecx
    pushl %ecx
    call _do_signal
    popl %eax
3:    popl %eax
    popl %ebx
    popl %ecx
    popl %edx
    pop %fs
    pop %es
    pop %ds
    iret

.align 2
_coprocessor_error:
    push %ds
    push %es
    push %fs
    pushl %edx
    pushl %ecx
    pushl %ebx
    pushl %eax
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    pushl $ret_from_sys_call
    jmp _math_error

.align 2
_device_not_available:
    push %ds
    push %es
    push %fs
    pushl %edx
    pushl %ecx
    pushl %ebx
    pushl %eax
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    pushl $ret_from_sys_call
    clts                # clear TS so that we can use math
    movl %cr0,%eax
    testl $0x4,%eax            # EM (math emulation bit)
    je _math_state_restore
    pushl %ebp
    pushl %esi
    pushl %edi
    call _math_emulate
    popl %edi
    popl %esi
    popl %ebp
    ret

.align 2
_timer_interrupt:
    push %ds        # save ds,es and put kernel data space
    push %es        # into them. %fs is used by _system_call
    push %fs
    pushl %edx        # we save %eax,%ecx,%edx as gcc doesn't
    pushl %ecx        # save those across function calls. %ebx
    pushl %ebx        # is saved as we use that in ret_sys_call
    pushl %eax
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    incl _jiffies
    movb $0x20,%al        # EOI to interrupt controller #1
    outb %al,$0x20
    movl CS(%esp),%eax
    andl $3,%eax        # %eax is CPL (0 or 3, 0=supervisor)
    pushl %eax
    call _do_timer        # 'do_timer(long CPL)' does everything from
    addl $4,%esp        # task switching to accounting ...
    jmp ret_from_sys_call

.align 2
_sys_execve:
    lea EIP(%esp),%eax
    pushl %eax
    call _do_execve
    addl $4,%esp
    ret

.align 2
_sys_fork:
    call _find_empty_process
    testl %eax,%eax
    js 1f
    push %gs
    pushl %esi
    pushl %edi
    pushl %ebp
    pushl %eax
    call _copy_process
    addl $20,%esp
1:    ret

_hd_interrupt:
    pushl %eax
    pushl %ecx
    pushl %edx
    push %ds
    push %es
    push %fs
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    movb $0x20,%al
    outb %al,$0xA0        # EOI to interrupt controller #1
    jmp 1f            # give port chance to breathe
1:    jmp 1f
1:    xorl %edx,%edx
    xchgl _do_hd,%edx
    testl %edx,%edx
    jne 1f
    movl $_unexpected_hd_interrupt,%edx
1:    outb %al,$0x20
    call *%edx        # "interesting" way of handling intr.
    pop %fs
    pop %es
    pop %ds
    popl %edx
    popl %ecx
    popl %eax
    iret

_floppy_interrupt:
    pushl %eax
    pushl %ecx
    pushl %edx
    push %ds
    push %es
    push %fs
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    movb $0x20,%al
    outb %al,$0x20        # EOI to interrupt controller #1
    xorl %eax,%eax
    xchgl _do_floppy,%eax
    testl %eax,%eax
    jne 1f
    movl $_unexpected_floppy_interrupt,%eax
1:    call *%eax        # "interesting" way of handling intr.
    pop %fs
    pop %es
    pop %ds
    popl %edx
    popl %ecx
    popl %eax
    iret

_parallel_interrupt:
    pushl %eax
    movb $0x20,%al
    outb %al,$0x20
    popl %eax
    iret
View Code

int 0x80是一个软中断,内核使用这个软中断实现用户空间访问内核空间的相关功能,为什么int 0x80是用户程序访问内核的方式而不是其他的软中断呢?https://www.cnblogs.com/zhenjingcool/p/15999035.html中已经介绍过

int指令是软中断指令,执行int指令自动压栈(内核栈)的寄存器有5个(用户态原ss、原sp、原eflag、cs、eip),还会将用户态代码段的下一条指令压入内核栈,参考:https://blog.csdn.net/tfnmdmx/article/details/119424519

nr_system_calls = 72
_system_call:
cmpl $nr_system_calls-1,%eax //nr_system_calls为最大支持的系统调用号,这里比较一下是否超过支持的最大系统调用号,如果超过则跳转到bad_system_call ja bad_sys_call push %ds //代码段寄存器入栈 push %es //附加段寄存器入栈 push %fs //标志段寄存器入栈 pushl %edx //edx入栈 pushl %ecx # push %ebx,%ecx,%edx as parameters pushl %ebx # to the system call movl $0x10,%edx # set up ds,es to kernel space mov %dx,%ds //代码段指向0x10处,由https://www.cnblogs.com/zhenjingcool/p/15972761.html 可知0x10表示的意思是内核态GDT表的第2项 mov %dx,%es //附加段指向0x10处 movl $0x17,%edx # fs points to local data space,0x17表示的意思是用户态LDT表的第2项 mov %dx,%fs //标志段寄存器指向0x17处.从这里我们可以看出,ds,es指向内核态代码段。fs指向用户态,在signal.c中会用得到 call _sys_call_table(,%eax,4) //sys_call_table在include/linux/sys.h中定义,见下面代码说明。由at&t寻址方式https://www.cnblogs.com/zhenjingcool/p/15925494.html可知,调用sys_call_table中下标为eax*4的位置的函数,比如1号中断的话,就是sys_exit函数。 pushl %eax //把系统调用的返回值入栈 movl _current,%eax //_current表示当前任务的数据结构的地址即task_struct写入eax cmpl $0,state(%eax) # state(%eax)=state+%eax=%eax,即判断当前任务_current是否为0,为0则表示就绪或者运行中,非0表示阻塞 jne reschedule //非0则跳转到reschedule cmpl $0,counter(%eax) # 判断时间片是否用完 je reschedule //时间片用完则跳转到reschedule

上面的_system_call有很多代码,但是最核心的就是这一行:call _sys_call_table(,%eax,4),此处是真正调用中断处理函数的地方,调用完毕后会接着执行ret_from_sys_call。这个标签也是非常重要的,我们可以把它看做系统调用后处理(post process)过程,在后处理过程中会处理该进程收到的一系列信号(这些信号往往是中断处理函数发给进程的),并调用do_signal进行信号处理。

错误的系统调用,返回-1

bad_sys_call:
    movl $-1,%eax
    iret
reschedule:
    pushl $ret_from_sys_call 标号地址压入堆栈
    jmp _schedule //重新执行系统调度函数,函数返回后,将跳转到ret_from_sys_call处执行

上面程序中用到的include/linux/sys.h代码

fn_ptr sys_call_table[] = { sys_setup, sys_exit, sys_fork, sys_read,
sys_write, sys_open, sys_close, sys_waitpid, sys_creat, sys_link,
sys_unlink, sys_execve, sys_chdir, sys_time, sys_mknod, sys_chmod,
sys_chown, sys_break, sys_stat, sys_lseek, sys_getpid, sys_mount,
sys_umount, sys_setuid, sys_getuid, sys_stime, sys_ptrace, sys_alarm,
sys_fstat, sys_pause, sys_utime, sys_stty, sys_gtty, sys_access,
sys_nice, sys_ftime, sys_sync, sys_kill, sys_rename, sys_mkdir,
sys_rmdir, sys_dup, sys_pipe, sys_times, sys_prof, sys_brk, sys_setgid,
sys_getgid, sys_signal, sys_geteuid, sys_getegid, sys_acct, sys_phys,
sys_lock, sys_ioctl, sys_fcntl, sys_mpx, sys_setpgid, sys_ulimit,
sys_uname, sys_umask, sys_chroot, sys_ustat, sys_dup2, sys_getppid,
sys_getpgrp, sys_setsid, sys_sigaction, sys_sgetmask, sys_ssetmask,
sys_setreuid,sys_setregid };

 

注:局部段描述符LDT,第1项是代码段,第2项是数据段,下面代码要注意这地方

ret_from_sys_call:
    movl _current,%eax        # task[0] cannot have signals
    cmpl _task,%eax //_task是数组,不加下标将表示第0个元素,这里是确定当前进程是不是_task[0]
    je 3f //如果是task0,则向前跳转到3标号处
    cmpw $0x0f,CS(%esp)        # CS为代码段基址,CS(%esp)为原代码段(也就是系统调用前的代码段),这里判断原代码段是否为00001111(段选择子TI=1表示局部描述符表,RPL=3表示用户态,index=1即第一个段<代码段>)
    jne 3f //如果原代码段 不是 普通用户的代码段,则说明是超级用户,不做下面的检查,直接跳转到标号3处
    cmpw $0x17,OLDSS(%esp)        # 同样的道理,0x17=0001 0111(TI=1表示局部描述符表,RPL=3表示用户态,index=2表示数据段),OLDSS(%esp)表示原堆栈段
    jne 3f //如果原堆栈不在用户数据段中,则也退出
    movl signal(%eax),%ebx //下面代码到标号3之前的代码,作用是检查当前task中的信号位图并选出一个优先级最高的信号,调用signal函数。可参考https://blog.csdn.net/finzale/article/details/44648301?utm_source=blogkpcl14
    movl blocked(%eax),%ecx //注意上面的signal(%eax)表示的意思。%eax表示进程,signal为上面定义的字面量12,这里取进程结构体偏移量12处开始的4字节放到ebx中,也就是信号位图。对照task_struct结构将会更好理解:struct task_struct {long state;long counter;long priority;long signal;struct sigaction sigaction[32];long blocked;...}
    notl %ecx //上一行代码blocked(%eax)表示取屏蔽信号位图放到ecx中.本行代码表示每位取反
    andl %ebx,%ecx //获得许可的信号位图
    bsfl %ecx,%ecx //从低位(0位)开始扫描位图,看是否有1的位,若有,则ecx保留该位的偏移值
    je 3f //如果没有信号,则向前跳转退出
    btrl %ecx,%ebx //复位该信号(ebx含有原signal位图)
    movl %ebx,signal(%eax) //重新保存signal位图信息
    incl %ecx //将信号调整为从1开始的数(1-32)
    pushl %ecx //信号值入栈作为调用do_signal的参数之一
    call _do_signal //调用c函数信号处理程序
    popl %eax
3:    popl %eax
    popl %ebx
    popl %ecx
    popl %edx
    pop %fs
    pop %es
    pop %ds
    iret

总结:_system_call标号实现系统调用,其真正发起系统调用的地方是 call _sys_call_table(,%eax,4) ,然后调用结束后,会接着执行 ret_from_sys_call 标号,这个标号里面对系统调用c函数执行返回后的信号量做识别处理。

 

下面是协处理器异常的处理程序,首先是寄存器入栈,ret_from_sys_call入栈,然后调用math_error函数,函数返回再执行一次ret_from_sys_call

_coprocessor_error:
    push %ds
    push %es
    push %fs
    pushl %edx
    pushl %ecx
    pushl %ebx
    pushl %eax
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    pushl $ret_from_sys_call
    jmp _math_error

下面是创建新进程的函数,find_empty_process和copy_process在fork.c中定义

_sys_fork:
    call _find_empty_process //首先判断是否进程号已满
    testl %eax,%eax
    js 1f //如果进程号已满,跳转到1处返回
    push %gs
    pushl %esi
    pushl %edi
    pushl %ebp
    pushl %eax
    call _copy_process 调用copy_process进行进程拷贝
    addl $20,%esp
1:    ret

硬盘中断处理程序

_hd_interrupt:
    pushl %eax
    pushl %ecx
    pushl %edx
    push %ds
    push %es
    push %fs
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    movb $0x20,%al
    outb %al,$0xA0        # EOI to interrupt controller #1
    jmp 1f            # give port chance to breathe
1:    jmp 1f
1:    xorl %edx,%edx
    xchgl _do_hd,%edx
    testl %edx,%edx
    jne 1f
    movl $_unexpected_hd_interrupt,%edx
1:    outb %al,$0x20
    call *%edx        # "interesting" way of handling intr.
    pop %fs
    pop %es
    pop %ds
    popl %edx
    popl %ecx
    popl %eax
    iret

软盘中断处理程序

_floppy_interrupt:
    pushl %eax
    pushl %ecx
    pushl %edx
    push %ds
    push %es
    push %fs
    movl $0x10,%eax
    mov %ax,%ds
    mov %ax,%es
    movl $0x17,%eax
    mov %ax,%fs
    movb $0x20,%al
    outb %al,$0x20        # EOI to interrupt controller #1
    xorl %eax,%eax
    xchgl _do_floppy,%eax
    testl %eax,%eax
    jne 1f
    movl $_unexpected_floppy_interrupt,%eax
1:    call *%eax        # "interesting" way of handling intr.
    pop %fs
    pop %es
    pop %ds
    popl %edx
    popl %ecx
    popl %eax
    iret

 

posted @ 2022-03-13 23:06  zhenjingcool  阅读(274)  评论(0编辑  收藏  举报