深入理解系统调用

1. 找一个系统调用,系统调用号为学号最后2位相同的系统调用:

 本次实验选择84号系统调用:rmdir。该系统调用的作用是删除文件夹。

#include <unistd.h>
int rmdir( const char *pathname );
返回值:若成功则返回0,若出错则返回-1

2. 通过汇编指令触发该系统调用:

首先使用c库函数触发rmdir系统调用:

#include<stdio.h>
#include<unistd.h>
int main(){
    int flag;
    char *dir = "test";
    flag = rmdir(dir);
    if (flag == 0)
    {
        printf("删除成功\n");
    }else
    {
        printf("删除失败\n");
    }
    return 0;
}

通过反汇编发现通过%rdi传递参数,通过%eax传递系统调用号:

 

 

下面使用嵌入式汇编来代替libc提供的rmdir()函数,并触发rmdir系统调用:

#include<stdio.h>
#include<unistd.h>
int main(){
    int flag;
    char *dir = "test";
    asm volatile(
        "movq %1, %%rdi\n\t"
        "movq $0x54, %%rax\n\t"
        "syscall\n\t"
        "movq %%rax,%0\n\t"
        :"=m"(flag)
        :"b"(dir)
    );
    if (flag == 0)
    {
        printf("Delete succeeded\n");
    }else
    {
        printf("Delete unsucceeded\n");
    }
    return 0;
} 

通过实验验证了改写成功,能够通过嵌入式汇编触发rmdir系统调用:

3. 通过gdb跟踪该系统调用的内核处理过程:

通过bt查看函数调用栈,结合课上所学知识和本次实验,绘制了两幅系统调用的宏观图:

64位嵌入式汇编:

 32位libc提供的rmdir()函数:

由于gdb无法追踪entry_SYSCALL_64处的汇编代码,因此我们只能把断点设在do_syscall_64()和__x64_sys_rmdir()函数处,通过以下调试代码可以看到:

首先gdb跟踪到了do_syscall_64 (nr=84, regs=0xffffc90000033f58) 处,然后进入函数体内,在执行到 regs->ax = sys_call_table[nr](regs) 时,根据系统调用号和参数跳转到 __x64_sys_rmdir() 函数处,执行完对应的内核处理函数后,依次返回到do_syscall_64 函数中执行 syscall_return_slowpath(regs) ,再返回到 entry_SYSCALL_64 中,在这段汇编的末尾,我们跟踪到了 USERGS_SYSRET64 这条语句,它相当于做了swapgs和sysretq,对应恢复现场和中断返回,接着继续执行用户态的代码。

Breakpoint 1, do_syscall_64 (nr=84, regs=0xffffc90000033f58)
    at arch/x86/entry/common.c:279
279    {
(gdb) l
274        prepare_exit_to_usermode(regs);
275    }
276    
277    #ifdef CONFIG_X86_64
278    __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
279    {
280        struct thread_info *ti;
281    
282        enter_from_user_mode();
283        local_irq_enable();
(gdb) n
283        local_irq_enable();
(gdb) n
284        ti = current_thread_info();
(gdb) n
285        if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
(gdb) n
288        if (likely(nr < NR_syscalls)) {
(gdb) n
289            nr = array_index_nospec(nr, NR_syscalls);
(gdb) n
290            regs->ax = sys_call_table[nr](regs);
(gdb) n

Breakpoint 2, __x64_sys_rmdir (regs=0xffffc90000033f58) at fs/namei.c:3951
3951    SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
(gdb) n
do_rmdir (dfd=-100, pathname=0x492164 "test") at fs/namei.c:3893
3893    {
(gdb) n
3900        unsigned int lookup_flags = 0;
(gdb) n
3893    {
(gdb) n
3902        name = filename_parentat(dfd, getname(pathname), lookup_flags,
(gdb) n
3904        if (IS_ERR(name))
(gdb) n
3902        name = filename_parentat(dfd, getname(pathname), lookup_flags,
(gdb) n
3904        if (IS_ERR(name))
(gdb) n
3907        switch (type) {
(gdb) n
3919        error = mnt_want_write(path.mnt);
(gdb) n
3920        if (error)
(gdb) n
3923        inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
(gdb) n
3924        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
(gdb) n
3926        if (IS_ERR(dentry))
(gdb) n
3924        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
(gdb) n
3926        if (IS_ERR(dentry))
(gdb) n
3928        if (!dentry->d_inode) {
(gdb) n
3935        error = vfs_rmdir(path.dentry->d_inode, dentry);
(gdb) n
3937        dput(dentry);
(gdb) n
3939        inode_unlock(path.dentry->d_inode);
(gdb) n
3940        mnt_drop_write(path.mnt);
(gdb) n
3942        path_put(&path);
(gdb) n
3945            lookup_flags |= LOOKUP_REVAL;
(gdb) n
3942        path_put(&path);
(gdb) n
3943        putname(name);
(gdb) n
3944        if (retry_estale(error, lookup_flags)) {
(gdb) n
3949    }
(gdb) n
do_syscall_64 (nr=180688, regs=0xffffc90000033f58)
    at arch/x86/entry/common.c:300
300        syscall_return_slowpath(regs);
(gdb) n
301    }
(gdb) n
entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:184
184        movq    RCX(%rsp), %rcx
(gdb) n
185        movq    RIP(%rsp), %r11
(gdb) n
187        cmpq    %rcx, %r11    /* SYSRET requires RCX == RIP */
(gdb) n
188        jne    swapgs_restore_regs_and_return_to_usermode
(gdb) n
205        shl    $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
(gdb) n
206        sar    $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
(gdb) n
210        cmpq    %rcx, %r11
(gdb) n
211        jne    swapgs_restore_regs_and_return_to_usermode
(gdb) n
213        cmpq    $__USER_CS, CS(%rsp)        /* CS must match SYSRET */
(gdb) n
214        jne    swapgs_restore_regs_and_return_to_usermode
(gdb) n
216        movq    R11(%rsp), %r11
(gdb) n
217        cmpq    %r11, EFLAGS(%rsp)        /* R11 == RFLAGS */
(gdb) n
218        jne    swapgs_restore_regs_and_return_to_usermode
(gdb) n
238        testq    $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
(gdb) n
239        jnz    swapgs_restore_regs_and_return_to_usermode
(gdb) n
243        cmpq    $__USER_DS, SS(%rsp)        /* SS must match SYSRET */
(gdb) n
244        jne    swapgs_restore_regs_and_return_to_usermode
(gdb) n
253        POP_REGS pop_rdi=0 skip_r11rcx=1
(gdb) n
entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:259
259        movq    %rsp, %rdi
(gdb) n
260        movq    PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
(gdb) n
entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:262
262        pushq    RSP-RDI(%rdi)    /* RSP */
(gdb) n
entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:263
263        pushq    (%rdi)        /* RDI */
(gdb) n
entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:271
271        SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
(gdb) n
273        popq    %rdi
(gdb) n
entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:274
274        popq    %rsp
(gdb) n
entry_SYSCALL_64 () at arch/x86/entry/entry_64.S:275
275        USERGS_SYSRET64
(gdb) n
0x0000000000400b83 in ?? ()
View Code

最后我们发现do_syscall_64函数执行到结尾处调用了syscall_return_slowpath(regs),无论执行哪个系统调用内核处理函数,执行完之后都会调用syscall_return_slowpath(regs),这是进程调度和进程切换的时机,进一步跟踪可以跟踪到schedule()函数和do_signal()函数,这就与理论上进程调度发生在系统调用结束时对应上了。

posted @ 2020-05-27 10:10  myhuang  阅读(380)  评论(0编辑  收藏  举报