fork 系统调用的执行过程与调试

casualet + 原创作品转载请注明出处 + 《Linux内核分析》MOOC课程http://mooc.study.163.com/course/USTC-1000029000 ”

前言:

  在linux中,我们可以通过fork系统调用来处理进程创建的任务。对于进程的创建, 可以sys_clone, sys_vfork,以及sys_fork. 这些系统调用的内部都使用了do_fork.函数。对于do_fork函数, 会copy tast_struct, 设置内核堆栈, 并且对一些特定的数据结构进行修改。其中里面还有copy_thread 函数, 会设置这个进程的cs和ip。这个是在进程的thread_info中保持的。这里的ip设置成了ret_from_fork函数(在ret_from_frok里面有一个jmp system_exit). 后面,fork系统调用本身可以进入到之前系统调用的部分讲的system_exit部分。 这样fork 系统调用在这里就会有一个进程调度的时机。schedule 对比自己写的多道时间片轮转的问题,进程调度大致的流程是,找stask_struct链表, 找里面可以用的进程,找到以后, 找里面保持的ip, 这里就是刚才设置的ret_from_fork, 从这里开始, jmp 到system_exit, 就可以ret restore_all, 恢复到父进程那个位置的代码,开始执行。 
     

下面展示调试的基本:

  首先,进入menuos, 然后开始设置一下断点:

 

然后,我们运行fork命令,发现fork命令停止到sys_clone断点:

这个过程是中断处理过程, 我们调用的fork系统函数, 通过int 0x80 陷入内核。 在sys_call_table中, 通过eax传递参数,获得中断向量表中的对应的中断处理函数,并且开始执行。 执行到sys_clone函数。 我们可以

看到,sys_clone代码在fork.c中。

1724SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1725		 int __user *, parent_tidptr,
1726		 int, tls_val,
1727		 int __user *, child_tidptr)
1728#elif defined(CONFIG_CLONE_BACKWARDS2)
1729SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
1730		 int __user *, parent_tidptr,
1731		 int __user *, child_tidptr,
1732		 int, tls_val)
1733#elif defined(CONFIG_CLONE_BACKWARDS3)
1734SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp,
1735		int, stack_size,
1736		int __user *, parent_tidptr,
1737		int __user *, child_tidptr,
1738		int, tls_val)
1739#else
1740SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
1741		 int __user *, parent_tidptr,
1742		 int __user *, child_tidptr,
1743		 int, tls_val)
1744#endif
1745{
1746	return do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
1747}
1748#endif

我们输入next,就开始进入do_fork断点,然后我们输入step进入do_fork函数。 对于创建进程的几个函数, 其最后的实际工作都是通过do_fork函数来完成的,所以我们现在重点关注一下这个函数。

long do_fork(unsigned long clone_flags,
1624          unsigned long stack_start,
1625          unsigned long stack_size,
1626          int __user *parent_tidptr,
1627          int __user *child_tidptr)
1628{
1629    struct task_struct *p;
1630    int trace = 0;
1631    long nr;
1632
1633    /*
1634     * Determine whether and which event to report to ptracer.  When
1635     * called from kernel_thread or CLONE_UNTRACED is explicitly
1636     * requested, no event is reported; otherwise, report if the event
1637     * for the type of forking is enabled.
1638     */
1639    if (!(clone_flags & CLONE_UNTRACED)) {
1640        if (clone_flags & CLONE_VFORK)
1641            trace = PTRACE_EVENT_VFORK;
1642        else if ((clone_flags & CSIGNAL) != SIGCHLD)
1643            trace = PTRACE_EVENT_CLONE;
1644        else
1645            trace = PTRACE_EVENT_FORK;
1646
1647        if (likely(!ptrace_event_enabled(current, trace)))
1648            trace = 0;
1649    }
1650
1651    p = copy_process(clone_flags, stack_start, stack_size,
1652             child_tidptr, NULL, trace);
1653    /*
1654     * Do this prior waking up the new thread - the thread pointer
1655     * might get invalid after that point, if the thread exits quickly.
1656     */
1657    if (!IS_ERR(p)) {
1658        struct completion vfork;
1659        struct pid *pid;
1660
1661        trace_sched_process_fork(current, p);
1662
1663        pid = get_task_pid(p, PIDTYPE_PID);
1664        nr = pid_vnr(pid);
1665
1666        if (clone_flags & CLONE_PARENT_SETTID)
1667            put_user(nr, parent_tidptr);
1668
1669        if (clone_flags & CLONE_VFORK) {
1670            p->vfork_done = &vfork;
1671            init_completion(&vfork);
1672            get_task_struct(p);
1673        }
1674
1675        wake_up_new_task(p);
1676
1677        /* forking complete and child started to run, tell ptracer */
1678        if (unlikely(trace))
1679            ptrace_event_pid(trace, pid);
1680
1681        if (clone_flags & CLONE_VFORK) {
1682            if (!wait_for_vfork_done(p, &vfork))
1683                ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);
1684        }
1685
1686        put_pid(pid);
1687    } else {
1688        nr = PTR_ERR(p);
1689    }
1690    return nr;
1691}

这个函数会进行一些pcb的拷贝工作。 我们主要关注里面的copy_process函数:这个函数会进行一些实际的内容拷贝工作。 

static struct task_struct *copy_process(unsigned long clone_flags,
1183                    unsigned long stack_start,
1184                    unsigned long stack_size,
1185                    int __user *child_tidptr,
1186                    struct pid *pid,
1187                    int trace)
1188{
1189    int retval;
1190    struct task_struct *p;
1191
1192    if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
1193        return ERR_PTR(-EINVAL);
1194
1195    if ((clone_flags & (CLONE_NEWUSER|CLONE_FS)) == (CLONE_NEWUSER|CLONE_FS))
1196        return ERR_PTR(-EINVAL);
1197
1198    /*
1199     * Thread groups must share signals as well, and detached threads
1200     * can only be started up within the thread group.
1201     */
1202    if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
1203        return ERR_PTR(-EINVAL);
1204
1205    /*
1206     * Shared signal handlers imply shared VM. By way of the above,
1207     * thread groups also imply shared VM. Blocking this case allows
1208     * for various simplifications in other code.
1209     */
1210    if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
1211        return ERR_PTR(-EINVAL);
1212
1213    /*
1214     * Siblings of global init remain as zombies on exit since they are
1215     * not reaped by their parent (swapper). To solve this and to avoid
1216     * multi-rooted process trees, prevent global and container-inits
1217     * from creating siblings.
1218     */
1219    if ((clone_flags & CLONE_PARENT) &&
1220                current->signal->flags & SIGNAL_UNKILLABLE)
1221        return ERR_PTR(-EINVAL);
1222
1223    /*
1224     * If the new process will be in a different pid or user namespace
1225     * do not allow it to share a thread group or signal handlers or
1226     * parent with the forking task.
1227     */
1228    if (clone_flags & CLONE_SIGHAND) {
1229        if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
1230            (task_active_pid_ns(current) !=
1231                current->nsproxy->pid_ns_for_children))
1232            return ERR_PTR(-EINVAL);
1233    }
1234
1235    retval = security_task_create(clone_flags);
1236    if (retval)
1237        goto fork_out;
1238
1239    retval = -ENOMEM;
1240    p = dup_task_struct(current);
1241    if (!p)
1242        goto fork_out;
1243
1244    ftrace_graph_init_task(p);
1245
1246    rt_mutex_init_task(p);
1247
1248#ifdef CONFIG_PROVE_LOCKING
1249    DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
1250    DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
1251#endif
1252    retval = -EAGAIN;
1253    if (atomic_read(&p->real_cred->user->processes) >=
1254            task_rlimit(p, RLIMIT_NPROC)) {
1255        if (p->real_cred->user != INIT_USER &&
1256            !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
1257            goto bad_fork_free;
1258    }
1259    current->flags &= ~PF_NPROC_EXCEEDED;
1260
1261    retval = copy_creds(p, clone_flags);
1262    if (retval < 0)
1263        goto bad_fork_free;
1264
1265    /*
1266     * If multiple threads are within copy_process(), then this check
1267     * triggers too late. This doesn't hurt, the check is only there
1268     * to stop root fork bombs.
1269     */
1270    retval = -EAGAIN;
1271    if (nr_threads >= max_threads)
1272        goto bad_fork_cleanup_count;
1273
1274    if (!try_module_get(task_thread_info(p)->exec_domain->module))
1275        goto bad_fork_cleanup_count;
1276
1277    delayacct_tsk_init(p);    /* Must remain after dup_task_struct() */
1278    p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
1279    p->flags |= PF_FORKNOEXEC;
1280    INIT_LIST_HEAD(&p->children);
1281    INIT_LIST_HEAD(&p->sibling);
1282    rcu_copy_process(p);
1283    p->vfork_done = NULL;
1284    spin_lock_init(&p->alloc_lock);
1285
1286    init_sigpending(&p->pending);
1287
1288    p->utime = p->stime = p->gtime = 0;
1289    p->utimescaled = p->stimescaled = 0;
1290#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
1291    p->prev_cputime.utime = p->prev_cputime.stime = 0;
1292#endif
1293#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1294    seqlock_init(&p->vtime_seqlock);
1295    p->vtime_snap = 0;
1296    p->vtime_snap_whence = VTIME_SLEEPING;
1297#endif
1298
1299#if defined(SPLIT_RSS_COUNTING)
1300    memset(&p->rss_stat, 0, sizeof(p->rss_stat));
1301#endif
1302
1303    p->default_timer_slack_ns = current->timer_slack_ns;
1304
1305    task_io_accounting_init(&p->ioac);
1306    acct_clear_integrals(p);
1307
1308    posix_cpu_timers_init(p);
1309
1310    p->start_time = ktime_get_ns(); //设置了一些和时间相关的数量
1311    p->real_start_time = ktime_get_boot_ns();
1312    p->io_context = NULL;
1313    p->audit_context = NULL;
1314    if (clone_flags & CLONE_THREAD)
1315        threadgroup_change_begin(current);
1316    cgroup_fork(p);
1317#ifdef CONFIG_NUMA
1318    p->mempolicy = mpol_dup(p->mempolicy);
1319    if (IS_ERR(p->mempolicy)) {
1320        retval = PTR_ERR(p->mempolicy);
1321        p->mempolicy = NULL;
1322        goto bad_fork_cleanup_threadgroup_lock;
1323    }
1324#endif
1325#ifdef CONFIG_CPUSETS
1326    p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
1327    p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
1328    seqcount_init(&p->mems_allowed_seq);
1329#endif
1330#ifdef CONFIG_TRACE_IRQFLAGS
1331    p->irq_events = 0;
1332    p->hardirqs_enabled = 0;
1333    p->hardirq_enable_ip = 0;
1334    p->hardirq_enable_event = 0;
1335    p->hardirq_disable_ip = _THIS_IP_;
1336    p->hardirq_disable_event = 0;
1337    p->softirqs_enabled = 1;
1338    p->softirq_enable_ip = _THIS_IP_;
1339    p->softirq_enable_event = 0;
1340    p->softirq_disable_ip = 0;
1341    p->softirq_disable_event = 0;
1342    p->hardirq_context = 0;
1343    p->softirq_context = 0;
1344#endif
1345#ifdef CONFIG_LOCKDEP
1346    p->lockdep_depth = 0; /* no locks held yet */
1347    p->curr_chain_key = 0;
1348    p->lockdep_recursion = 0;
1349#endif
1350
1351#ifdef CONFIG_DEBUG_MUTEXES
1352    p->blocked_on = NULL; /* not blocked yet */
1353#endif
1354#ifdef CONFIG_BCACHE
1355    p->sequential_io    = 0;
1356    p->sequential_io_avg    = 0;
1357#endif
1358
1359    /* Perform scheduler related setup. Assign this task to a CPU. */
1360    retval = sched_fork(clone_flags, p);
1361    if (retval)
1362        goto bad_fork_cleanup_policy;
1363
1364    retval = perf_event_init_task(p);
1365    if (retval)
1366        goto bad_fork_cleanup_policy;
1367    retval = audit_alloc(p);
1368    if (retval)
1369        goto bad_fork_cleanup_perf;
1370    /* copy all the process information */
1371    shm_init_task(p);
1372    retval = copy_semundo(clone_flags, p);
1373    if (retval)
1374        goto bad_fork_cleanup_audit;
1375    retval = copy_files(clone_flags, p);
1376    if (retval)
1377        goto bad_fork_cleanup_semundo;
1378    retval = copy_fs(clone_flags, p);
1379    if (retval)
1380        goto bad_fork_cleanup_files;
1381    retval = copy_sighand(clone_flags, p);
1382    if (retval)
1383        goto bad_fork_cleanup_fs;
1384    retval = copy_signal(clone_flags, p);
1385    if (retval)
1386        goto bad_fork_cleanup_sighand;
1387    retval = copy_mm(clone_flags, p);
1388    if (retval)
1389        goto bad_fork_cleanup_signal;
1390    retval = copy_namespaces(clone_flags, p);
1391    if (retval)
1392        goto bad_fork_cleanup_mm;
1393    retval = copy_io(clone_flags, p);
1394    if (retval)
1395        goto bad_fork_cleanup_namespaces;
1396    retval = copy_thread(clone_flags, stack_start, stack_size, p);
1397    if (retval)
1398        goto bad_fork_cleanup_io;
1399
1400    if (pid != &init_struct_pid) {
1401        retval = -ENOMEM;
1402        pid = alloc_pid(p->nsproxy->pid_ns_for_children);
1403        if (!pid)
1404            goto bad_fork_cleanup_io;
1405    }
1406
1407    p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
1408    /*
1409     * Clear TID on mm_release()?
1410     */
1411    p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
1412#ifdef CONFIG_BLOCK
1413    p->plug = NULL;
1414#endif
1415#ifdef CONFIG_FUTEX
1416    p->robust_list = NULL;
1417#ifdef CONFIG_COMPAT
1418    p->compat_robust_list = NULL;
1419#endif
1420    INIT_LIST_HEAD(&p->pi_state_list);
1421    p->pi_state_cache = NULL;
1422#endif
1423    /*
1424     * sigaltstack should be cleared when sharing the same VM
1425     */
1426    if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
1427        p->sas_ss_sp = p->sas_ss_size = 0;
1428
1429    /*
1430     * Syscall tracing and stepping should be turned off in the
1431     * child regardless of CLONE_PTRACE.
1432     */
1433    user_disable_single_step(p);
1434    clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
1435#ifdef TIF_SYSCALL_EMU
1436    clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
1437#endif
1438    clear_all_latency_tracing(p);
1439
1440    /* ok, now we should be set up.. */
1441    p->pid = pid_nr(pid);
1442    if (clone_flags & CLONE_THREAD) {
1443        p->exit_signal = -1;
1444        p->group_leader = current->group_leader;
1445        p->tgid = current->tgid;
1446    } else {
1447        if (clone_flags & CLONE_PARENT)
1448            p->exit_signal = current->group_leader->exit_signal;
1449        else
1450            p->exit_signal = (clone_flags & CSIGNAL);
1451        p->group_leader = p;
1452        p->tgid = p->pid;
1453    }
1454
1455    p->nr_dirtied = 0;
1456    p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
1457    p->dirty_paused_when = 0;
1458
1459    p->pdeath_signal = 0;
1460    INIT_LIST_HEAD(&p->thread_group);
1461    p->task_works = NULL;
1462
1463    /*
1464     * Make it visible to the rest of the system, but dont wake it up yet.
1465     * Need tasklist lock for parent etc handling!
1466     */
1467    write_lock_irq(&tasklist_lock);
1468
1469    /* CLONE_PARENT re-uses the old parent */
1470    if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
1471        p->real_parent = current->real_parent;
1472        p->parent_exec_id = current->parent_exec_id;
1473    } else {
1474        p->real_parent = current;
1475        p->parent_exec_id = current->self_exec_id;
1476    }
1477
1478    spin_lock(&current->sighand->siglock);
1479
1480    /*
1481     * Copy seccomp details explicitly here, in case they were changed
1482     * before holding sighand lock.
1483     */
1484    copy_seccomp(p);
1485
1486    /*
1487     * Process group and session signals need to be delivered to just the
1488     * parent before the fork or both the parent and the child after the
1489     * fork. Restart if a signal comes in before we add the new process to
1490     * it's process group.
1491     * A fatal signal pending means that current will exit, so the new
1492     * thread can't slip out of an OOM kill (or normal SIGKILL).
1493    */
1494    recalc_sigpending();
1495    if (signal_pending(current)) {
1496        spin_unlock(&current->sighand->siglock);
1497        write_unlock_irq(&tasklist_lock);
1498        retval = -ERESTARTNOINTR;
1499        goto bad_fork_free_pid;
1500    }
1501
1502    if (likely(p->pid)) {
1503        ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
1504
1505        init_task_pid(p, PIDTYPE_PID, pid);
1506        if (thread_group_leader(p)) {
1507            init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));
1508            init_task_pid(p, PIDTYPE_SID, task_session(current));
1509
1510            if (is_child_reaper(pid)) {
1511                ns_of_pid(pid)->child_reaper = p;
1512                p->signal->flags |= SIGNAL_UNKILLABLE;
1513            }
1514
1515            p->signal->leader_pid = pid;
1516            p->signal->tty = tty_kref_get(current->signal->tty);
1517            list_add_tail(&p->sibling, &p->real_parent->children);
1518            list_add_tail_rcu(&p->tasks, &init_task.tasks);
1519            attach_pid(p, PIDTYPE_PGID);
1520            attach_pid(p, PIDTYPE_SID);
1521            __this_cpu_inc(process_counts);
1522        } else {
1523            current->signal->nr_threads++;
1524            atomic_inc(&current->signal->live);
1525            atomic_inc(&current->signal->sigcnt);
1526            list_add_tail_rcu(&p->thread_group,
1527                      &p->group_leader->thread_group);
1528            list_add_tail_rcu(&p->thread_node,
1529                      &p->signal->thread_head);
1530        }
1531        attach_pid(p, PIDTYPE_PID);
1532        nr_threads++;
1533    }
1534
1535    total_forks++;
1536    spin_unlock(&current->sighand->siglock);
1537    syscall_tracepoint_update(p);
1538    write_unlock_irq(&tasklist_lock);
1539
1540    proc_fork_connector(p);
1541    cgroup_post_fork(p);
1542    if (clone_flags & CLONE_THREAD)
1543        threadgroup_change_end(current);
1544    perf_event_fork(p);
1545
1546    trace_task_newtask(p, clone_flags);
1547    uprobe_copy_process(p, clone_flags);
1548
1549    return p;
1550
1551bad_fork_free_pid:
1552    if (pid != &init_struct_pid)
1553        free_pid(pid);
1554bad_fork_cleanup_io:
1555    if (p->io_context)
1556        exit_io_context(p);
1557bad_fork_cleanup_namespaces:
1558    exit_task_namespaces(p);
1559bad_fork_cleanup_mm:
1560    if (p->mm)
1561        mmput(p->mm);
1562bad_fork_cleanup_signal:
1563    if (!(clone_flags & CLONE_THREAD))
1564        free_signal_struct(p->signal);
1565bad_fork_cleanup_sighand:
1566    __cleanup_sighand(p->sighand);
1567bad_fork_cleanup_fs:
1568    exit_fs(p); /* blocking */
1569bad_fork_cleanup_files:
1570    exit_files(p); /* blocking */
1571bad_fork_cleanup_semundo:
1572    exit_sem(p);
1573bad_fork_cleanup_audit:
1574    audit_free(p);
1575bad_fork_cleanup_perf:
1576    perf_event_free_task(p);
1577bad_fork_cleanup_policy:
1578#ifdef CONFIG_NUMA
1579    mpol_put(p->mempolicy);
1580bad_fork_cleanup_threadgroup_lock:
1581#endif
1582    if (clone_flags & CLONE_THREAD)
1583        threadgroup_change_end(current);
1584    delayacct_tsk_free(p);
1585    module_put(task_thread_info(p)->exec_domain->module);
1586bad_fork_cleanup_count:
1587    atomic_dec(&p->cred->user->processes);
1588    exit_creds(p);
1589bad_fork_free:
1590    free_task(p);
1591fork_out:
1592    return ERR_PTR(retval);
1593}

我们可以看到1240行的函数,dup_task_struct. 这个函数利用了当前进程的current指针,进行了pcb的拷贝工作。除此之外, 后面还会进行一系列的copy操作, 包括mm等部分。 最后,这部分的函数执行结束,返回到do_fork函数继续执行。在上面的1396

行有一个copy_thread函数,这个函数对于不同的体系结构, 有不同的实现,在这里我们选择32位的x86架构,我们来查看一下它的代码:

132int copy_thread(unsigned long clone_flags, unsigned long sp,
133    unsigned long arg, struct task_struct *p)
134{
135    struct pt_regs *childregs = task_pt_regs(p);
136    struct task_struct *tsk;
137    int err;
138
139    p->thread.sp = (unsigned long) childregs;
140    p->thread.sp0 = (unsigned long) (childregs+1);
141    memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
142
143    if (unlikely(p->flags & PF_KTHREAD)) {
144        /* kernel thread */
145        memset(childregs, 0, sizeof(struct pt_regs));
146        p->thread.ip = (unsigned long) ret_from_kernel_thread;
147        task_user_gs(p) = __KERNEL_STACK_CANARY;
148        childregs->ds = __USER_DS;
149        childregs->es = __USER_DS;
150        childregs->fs = __KERNEL_PERCPU;
151        childregs->bx = sp;    /* function */
152        childregs->bp = arg;
153        childregs->orig_ax = -1;
154        childregs->cs = __KERNEL_CS | get_kernel_rpl();
155        childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_FIXED;
156        p->thread.io_bitmap_ptr = NULL;
157        return 0;
158    }
159    *childregs = *current_pt_regs();
160    childregs->ax = 0;
161    if (sp)
162        childregs->sp = sp;
163
164    p->thread.ip = (unsigned long) ret_from_fork;
165    task_user_gs(p) = get_user_gs(current_pt_regs());
166
167    p->thread.io_bitmap_ptr = NULL;
168    tsk = current;
169    err = -ENOMEM;
170
171    if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
172        p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
173                        IO_BITMAP_BYTES, GFP_KERNEL);
174        if (!p->thread.io_bitmap_ptr) {
175            p->thread.io_bitmap_max = 0;
176            return -ENOMEM;
177        }
178        set_tsk_thread_flag(p, TIF_IO_BITMAP);
179    }
180
181    err = 0;
182
183    /*
184     * Set a new TLS for the child thread?
185     */
186    if (clone_flags & CLONE_SETTLS)
187        err = do_set_thread_area(p, -1,
188            (struct user_desc __user *)childregs->si, 0);
189
190    if (err && p->thread.io_bitmap_ptr) {
191        kfree(p->thread.io_bitmap_ptr);
192        p->thread.io_bitmap_max = 0;
193    }
194    return err;
195}

我们可以看到, 在copy_process中有一句p->thread.ip=(unsigned long)ret_from_work; 这个设置了进程的ip。

在do_fork执行完成以后(return nr),会返回sys_clone的宏定义部分, 然后进入core.c中的schedule函数, 进行进程调度。 调度的时候, 会选中刚才创建的子进程的PCB。这样可以由刚才设置的ip进入ret_from_fork函数进行执行。

ENTRY(ret_from_fork)
291        CFI_STARTPROC
292        pushl_cfi %eax
293        call schedule_tail
294        GET_THREAD_INFO(%ebp)
295        popl_cfi %eax
296        pushl_cfi $0x0202        # Reset kernel eflags
297        popfl_cfi
298        jmp syscall_exit
299        CFI_ENDPROC
300    END(ret_from_fork)

在这里使用call schedule_tail, 又会调用schedule函数。进行一些相关的处理。至此,我们完成了fork函数的基本执行。

posted @ 2016-04-03 21:54  Casualet  阅读(1709)  评论(0编辑  收藏  举报