调度程序schedule()注释
调度时机:
1、进程终止、睡眠,这些通常是进程自身行为,当然也有运行异常;
2、时间片用完时,而时间片更新是在时钟中断驱动下完成的;
3、设备驱动程序;
4、进程从异常、中断、及系统调用返回的时候会进行need_resched()检测,会触发调度。
问题:多cpu下,时钟中断如何处理的?时钟中断应该是只被一个cpu捕获并处理,那么其他cpu靠什么来驱动时间片更新?怎么触发调度?
调度具体实现
1 asmlinkage void schedule(void) 2 { 3 struct schedule_data * sched_data; 4 struct task_struct *prev, *next, *p; 5 struct list_head *tmp; 6 int this_cpu, c; 7 8 9 spin_lock_prefetch(&runqueue_lock); //锁运行队列 10 11 if (!current->active_mm) BUG(); //内核线程没有mm空间,但其active_mm会借用当前进程 12 need_resched_back: //的mm,保持与用户线程处理的统一性 13 prev = current; //当前进程即将被调度出去,current也比较巧妙,task 14 //_struct与内核堆栈共用一个8kB的union体,其中task 15 //_struct占用低位的1kB左右,堆栈占用高位约7KB空间 16 //通过对堆栈指针esp&0xffffe0000即可 17 this_cpu = prev->processor; //取得进程运行的cpu 18 19 if (unlikely(in_interrupt())) { //unlikely用于gcc>=2.96之后的编译优化,表示if内代 20 //码运行的可能性比较低,这样编译器就可以将else里 21 //面的代码提前,cpu在进行指令预取方面有性能提高 22 //反之,likely则是if内代码运行可能性高 23 printk("Scheduling in interrupt\n"); //如果在中断中进行调度,有问题 24 BUG(); 25 } 26 27 release_kernel_lock(prev, this_cpu); //如果prev占用了全局内核锁,释放;如果当前cpu占 28 //用了全局中断锁,释放;开当前cpu中断线 29 30 /* 31 * 'sched_data' is protected by the fact that we can run 32 * only one process per CPU. 33 */ 34 sched_data = &aligned_data[this_cpu].schedule_data; //这个也不大懂,schedule_data里面有64位的last_sch 35 //edule信息,不知道smp中用这个干什么 36 spin_lock_irq(&runqueue_lock); //锁运行队列,关中断 37 38 /* move an exhausted RR process to be last.. */ 39 if (unlikely(prev->policy == SCHED_RR)) //如果是实时进程 40 if (!prev->counter) { //时间片已经用完 41 prev->counter = NICE_TO_TICKS(prev->nice); //将nice转换为时间片,nice为UNIX时期沿用的负向优 42 //向优先级,取值-20~19,值越大越谦让,值越小,优 43 //先级越高 44 move_last_runqueue(prev); //将其移动到运行队列尾部 45 } 46 47 switch (prev->state) { //获取进程状态 48 case TASK_INTERRUPTIBLE: //可被信号唤醒的中断 49 if (signal_pending(prev)) { //如果有信号到来,就让其进入TASK_RUNNING状态 50 prev->state = TASK_RUNNING; 51 break; 52 } 53 default: 54 del_from_runqueue(prev); //TASK_STOPED,TASK_ZOMBE,TASK_UNINNTERRUPTIBLE状 55 //态,比如调用exit(),wait4()等 56 case TASK_RUNNING:; 57 } 58 prev->need_resched = 0; //清空need_resched 59 60 /* 61 * this is the scheduler proper: 62 */ 63 64 repeat_schedule: 65 /* 66 * Default process to select.. 67 */ 68 next = idle_task(this_cpu); //获得空闲进程 69 c = -1000; //找最大值的常用初始化 70 list_for_each(tmp, &runqueue_head) { //遍历运行队列 71 p = list_entry(tmp, struct task_struct, run_list); 72 if (can_schedule(p, this_cpu)) { //如果程序可以在cpu上跑,并且允许在这颗cpu上跑 73 int weight = goodness(p, this_cpu, prev->active_mm); //获取调度权重 74 if (weight > c) 75 c = weight, next = p; //更新最大权重与选中进程 76 } 77 } 78 79 /* Do we need to re-calculate counters? */ 80 if (unlikely(!c)) { //c==0?说明所有进程时间片用完了,可能性很小 81 struct task_struct *p; 82 83 spin_unlock_irq(&runqueue_lock); //开运行队列锁,开中断 84 read_lock(&tasklist_lock); //锁住进程双向链表 85 for_each_task(p) 86 p->counter = (p->counter >> 1) + NICE_TO_TICKS(p->nice); //更新每个进程的时间片 87 read_unlock(&tasklist_lock); //开进程双向链表 88 spin_lock_irq(&runqueue_lock); //锁运行队列,开中断 89 goto repeat_schedule; //再次寻找最值得调度的进程 90 } 91 92 /* 93 * from this point on nothing can prevent us from 94 * switching to the next task, save this fact in 95 * sched_data. 96 */ 97 sched_data->curr = next; //cpu的正在运行进程指向新进程 98 task_set_cpu(next, this_cpu); //将task_struct的processor与cpus_runnable更新 99 spin_unlock_irq(&runqueue_lock); //解锁运行队列,开中断 100 101 if (unlikely(prev == next)) { //如果选择到的进程仍为之前的进程 102 /* We won't go through the normal tail, so do this by hand */ 103 prev->policy &= ~SCHED_YIELD; //那就不再客气了 104 goto same_process; 105 } 106 107 #ifdef CONFIG_SMP 108 /* 109 * maintain the per-process 'last schedule' value. 110 * (this has to be recalculated even if we reschedule to 111 * the same process) Currently this is only used on SMP, 112 * and it's approximate, so we do not have to maintain 113 * it while holding the runqueue spinlock. 114 */ 115 sched_data->last_schedule = get_cycles(); //更新调度进程时的时钟,用于smp中另外一个cpu调度参考 116 117 /* 118 * We drop the scheduler lock early (it's a global spinlock), 119 * thus we have to lock the previous process from getting 120 * rescheduled during switch_to(). 121 */ 122 123 #endif /* CONFIG_SMP */ 124 125 kstat.context_swtch++; //记录调度次数 126 /* 127 * there are 3 processes which are affected by a context switch: 128 * 129 * prev == .... ==> (last => next) 130 * 131 * It's the 'much more previous' 'prev' that is on next's stack, 132 * but prev is set to (the just run) 'last' process by switch_to(). 133 * This might sound slightly confusing but makes tons of sense. 134 */ 135 prepare_to_switch(); 136 { 137 struct mm_struct *mm = next->mm; //新进程的运行空间 138 struct mm_struct *oldmm = prev->active_mm; //原进程的运行空间 139 if (!mm) { //如果新进程没有运行空间,则是内核进程 140 if (next->active_mm) BUG(); //内核进程在调度出去的时候会释放其借用的运行空间,如 141 //果此处仍然存在,则有问题 142 next->active_mm = oldmm; //借用原进程的运行空间 143 atomic_inc(&oldmm->mm_count); //原进程运行空间计数加1,用于内存交换信息 144 enter_lazy_tlb(oldmm, next, this_cpu); //tlb采用lazy刷新方式 145 } 146 else { //如果是用户进程 147 if (next->active_mm != mm) BUG(); //用户进程的两个运行空间应该相同 148 switch_mm(oldmm, mm, next, this_cpu); //切换用户空间 149 } 150 151 if (!prev->mm) { //如果原进程是内核进程 152 prev->active_mm = NULL; //释放其引用的运行空间 153 mmdrop(oldmm); //运行空间计数-1 154 } 155 } 156 157 /* 158 * This just switches the register state and the 159 * stack. 160 */ 161 switch_to(prev, next, prev); //切换寄存器状态与堆栈 162 __schedule_tail(prev); //原进程放入运行队列尾部 163 164 same_process: 165 reacquire_kernel_lock(current); //针对smp,要将当前进程的内核深度清0 166 if (current->need_resched) //再次调度 167 goto need_resched_back; 168 return; 169 }