package main func start(c chan int) { c<-100 } func main() { c:=make(chan int) go start(c) <-c }
该程序启动时,main goroutine首先会创建一个无缓存的channel,然后启动一个goroutine(为了方便讨论我们称它为g2)向channel发送数据,而main自己则去读取这个channel。
这两个goroutine读写channel时一定会发生一次阻塞,不是main goroutine读取channel时发生阻塞就是g2写入channel时发生阻塞。
创建g2 goroutine
0x44f4d0<+0>: mov %fs:0xfffffffffffffff8,%rcx 0x44f4d9<+9>: cmp 0x10(%rcx),%rsp 0x44f4dd<+13>: jbe 0x44f549 <main.main+121> 0x44f4df<+15>: sub $0x28,%rsp 0x44f4e3<+19>: mov %rbp,0x20(%rsp) 0x44f4e8<+24>: lea 0x20(%rsp),%rbp 0x44f4ed<+29>: lea 0xb36c(%rip),%rax 0x44f4f4<+36>: mov %rax,(%rsp) 0x44f4f8<+40>: movq $0x0,0x8(%rsp) 0x44f501<+49>: callq 0x404330 <runtime.makechan> #创建channel 0x44f506<+54>: mov 0x10(%rsp),%rax 0x44f50b<+59>: mov %rax,0x18(%rsp) 0x44f510<+64>: movl $0x8,(%rsp) 0x44f517<+71>: lea 0x240f2(%rip),%rcx 0x44f51e<+78>: mov %rcx,0x8(%rsp) 0x44f523<+83>: callq 0x42c1b0 <runtime.newproc> #创建goroutine 0x44f528<+88>: mov 0x18(%rsp),%rax 0x44f52d<+93>: mov %rax,(%rsp) 0x44f531<+97>: movq $0x0,0x8(%rsp) 0x44f53a<+106>: callq 0x405080 <runtime.chanrecv1> #从channel读取数据 0x44f53f<+111>: mov 0x20(%rsp),%rbp 0x44f544<+116>: add $0x28,%rsp 0x44f548<+120>: retq 0x44f549<+121>: callq 0x447390 <runtime.morestack_noctxt> 0x44f54e<+126>: jmp 0x44f4d0 <main.main>
设置好g的sched成员,该成员包括调度g时所必须pc, sp, bp等调度信息;
runtime/proc.go : 4746
// runqput tries to put g on the local runnable queue. // If next is false, runqput adds g to the tail of the runnable queue. // If next is true, runqput puts g in the _p_.runnext slot. // If the run queue is full, runnext puts g on the global queue. // Executed only by the owner P. func runqput(_p_ *p, gp *g, next bool) { if randomizeScheduler && next && fastrand() % 2 == 0 { next = false } if next { //把gp放在_p_.runnext成员里, //runnext成员中的goroutine会被优先调度起来运行 retryNext: oldnext := _p_.runnext if !_p_.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { //有其它线程在操作runnext成员,需要重试 goto retryNext } if oldnext == 0 { //原本runnext为nil,所以没任何事情可做了,直接返回 return } // Kick the old runnext out to the regular run queue. gp = oldnext.ptr() //原本存放在runnext的gp需要放入runq的尾部 } retry: //可能有其它线程正在并发修改runqhead成员,所以需要跟其它线程同步 h := atomic.LoadAcq(&_p_.runqhead) // load-acquire, synchronize with consumers t := _p_.runqtail if t - h < uint32(len(_p_.runq)) { //判断队列是否满了 //队列还没有满,可以放入 _p_.runq[t % uint32(len(_p_.runq))].set(gp) // store-release, makes it available for consumption //虽然没有其它线程并发修改这个runqtail,但其它线程会并发读取该值以及p的runq成员 //这里使用StoreRel是为了: //1,原子写入runqtail //2,防止编译器和CPU乱序,保证上一行代码对runq的修改发生在修改runqtail之前 //3,可见行屏障,保证当前线程对运行队列的修改对其它线程立马可见 atomic.StoreRel(&_p_.runqtail, t + 1) return } //p的本地运行队列已满,需要放入全局运行队列 if runqputslow(_p_, gp, h, t) { return } // the queue is not full, now the put above must succeed goto retry }
runtime/proc.go : 4784
// Put g and a batch of work from local runnable queue on global queue. // Executed only by the owner P. func runqputslow(_p_ *p, gp *g, h, t uint32) bool { var batch [len(_p_.runq) / 2 + 1]*g //gp加上_p_本地队列的一半 // First, grab a batch from local queue. n := t - h n = n / 2 if n != uint32(len(_p_.runq) / 2) { throw("runqputslow: queue is not full") } for i := uint32(0); i < n; i++ { //取出p本地队列的一半 batch[i] = _p_.runq[(h+i) % uint32(len(_p_.runq))].ptr() } if !atomic.CasRel(&_p_.runqhead, h, h + n) { // cas-release, commits consume //如果cas操作失败,说明已经有其它工作线程从_p_的本地运行队列偷走了一些goroutine,所以直接返回 return false } batch[n] = gp if randomizeScheduler { for i := uint32(1); i <= n; i++ { j := fastrandn(i + 1) batch[i], batch[j] = batch[j], batch[i] } } // Link the goroutines. //全局运行队列是一个链表,这里首先把所有需要放入全局运行队列的g链接起来, //减少后面对全局链表的锁住时间,从而降低锁冲突 for i := uint32(0); i < n; i++ { batch[i].schedlink.set(batch[i+1]) } var q gQueue q.head.set(batch[0]) q.tail.set(batch[n]) // Now put the batch on global queue. lock(&sched.lock) globrunqputbatch(&q, int32(n+1)) unlock(&sched.lock) return true }
分析完runqput函数是如何把goroutine放入运行队列之后,接下来我们继续分析main goroutine因读取channel而发生的阻塞流程。
从代码逻辑的角度来说,我们不能确定main goroutine和新创建出来的g2谁先运行,但对于我们分析来说我们可以假定某个goroutine先运行,因为不管谁先运行,都会阻塞在channel的读或则写上,所以这里我们假设main创建好g2后首先阻塞在了对channel的读操作上。下面我们看看读取channel的过程。
runtime/chan.go : 403
// entry points for <- c from compiled code //go:nosplit func chanrecv1(c *hchan, elem unsafe.Pointer) { chanrecv(c, elem, true) } // runtime/chan.go : 415 func chanrecv(c *hchan, ep unsafe.Pointer, block bool) (selected, received bool) { ...... //省略部分的代码逻辑主要在判断读取操作是否可以立即完成,如果不能立即完成 //就需要把g挂在channel c的读取队列上,然后调用goparkunlock函数阻塞此goroutine goparkunlock(&c.lock, waitReasonChanReceive, traceEvGoBlockRecv, 3) ...... }
runtime/proc.go : 304
// Puts the current goroutine into a waiting state and unlocks the lock. // The goroutine can be made runnable again by calling goready(gp). func goparkunlock(lock*mutex, reasonwaitReason, traceEvbyte, traceskipint) { gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceEv, traceskip) } // runtime/proc.go : 276 // Puts the current goroutine into a waiting state and calls unlockf. // If unlockf returns false, the goroutine is resumed. // unlockf must not access this G's stack, as it may be moved between // the call to gopark and the call to unlockf. // Reason explains why the goroutine has been parked. // It is displayed in stack traces and heap dumps. // Reasons should be unique and descriptive. // Do not re-use reasons, add new ones. func gopark(unlockffunc(*g, unsafe.Pointer) bool, lockunsafe.Pointer, reason waitReason, traceEvbyte, traceskipint) { ...... // can't do anything that might move the G between Ms here. mcall(park_m) //切换到g0栈执行park_m函数 }
goparkunlock函数直接调用gopark函数,gopark则调用mcall从当前main goroutine切换到g0去执行park_m函数(mcall前面我们分析过,其主要作用就是保存当前goroutine的现场,然后切换到g0栈去调用作为参数传递给它的函数)
runtime/proc.go : 2581
// park continuation on g0. func park_m(gp*g) { _g_ := getg() if trace.enabled { traceGoPark(_g_.m.waittraceev, _g_.m.waittraceskip) } casgstatus(gp, _Grunning, _Gwaiting) dropg() //解除g和m之间的关系 ...... schedule() }
park_m首先把当前goroutine的状态设置为_Gwaiting(因为它正在等待其它goroutine往channel里面写数据),然后调用dropg函数解除g和m之间的关系,最后通过调用schedule函数进入调度循环,schedule函数我们也详细分析过,它首先会从运行队列中挑选出一个goroutine,然后调用gogo函数切换到被挑选出来的goroutine去运行。因为main goroutine在读取channel被阻塞之前已经把创建好的g2放入了运行队列,所以在这里schedule会把g2调度起来运行,这里完成了一次从main goroutine到g2调度(我们假设只有一个工作线程在进行调度)。
g2 goroutine的入口是start函数,下面我们就从该函数开始分析g2写channel的流程,看它如何唤醒正在等待着读取channel的main goroutine。还是先来反汇编一下start函数的代码:
0x44f480<+0>:mov %fs:0xfffffffffffffff8,%rcx 0x44f489<+9>:cmp 0x10(%rcx),%rsp 0x44f48d<+13>:jbe 0x44f4c1 <main.start+65> 0x44f48f<+15>:sub $0x18,%rsp 0x44f493<+19>:mov %rbp,0x10(%rsp) 0x44f498<+24>:lea 0x10(%rsp),%rbp 0x44f49d<+29>:mov 0x20(%rsp),%rax 0x44f4a2<+34>:mov %rax,(%rsp) 0x44f4a6<+38>:lea 0x2d71b(%rip),%rax 0x44f4ad<+45>:mov %rax,0x8(%rsp) 0x44f4b2<+50>:callq 0x404560 <runtime.chansend1> #写channel 0x44f4b7<+55>:mov 0x10(%rsp),%rbp 0x44f4bc<+60>:add $0x18,%rsp 0x44f4c0<+64>:retq 0x44f4c1<+65>:callq 0x447390 <runtime.morestack_noctxt> 0x44f4c6<+70>:jmp 0x44f480 <main.start>
runtime/chan.go : 124
/ entry point for c <- x from compiled code //go:nosplit func chansend1(c *hchan, elem unsafe.Pointer) { chansend(c, elem, true, getcallerpc()) } // runtime/chan.go : 142 func chansend(c *hchan, ep unsafe.Pointer, block bool, callerpc uintptr) bool { ...... if sg := c.recvq.dequeue(); sg != nil { // Found a waiting receiver. We pass the value we want to send // directly to the receiver, bypassing the channel buffer (if any). //可以直接发送数据给sg send(c, sg, ep, func() { unlock(&c.lock) }, 3) return true } ...... } // runtime/chan.go : 269 func send(c *hchan, sg *sudog, ep unsafe.Pointer, unlockf func(), skip int) { ...... goready(gp, skip+1) } // runtime/proc.go : 310 func goready(gp *g, traceskip int) { systemstack(func() { ready(gp, traceskip, true) }) }
channel发送和读取的流程类似,如果能够立即发送则立即发送并返回,如果不能立即发送则需要阻塞,在我们这个场景中,因为main goroutine此时此刻正挂在channel的读取队列上等待数据,所以这里直接调用send函数发送给main goroutine,send函数则调用goready函数切换到g0栈并调用ready函数来唤醒sg对应的goroutine,即正在等待读channel的main goroutine。
runtime/proc.go : 639
// Mark gp ready to run. func ready(gp *g, traceskip int, next bool) { ...... // Mark runnable. _g_ := getg() ...... // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq casgstatus(gp, _Gwaiting, _Grunnable) runqput(_g_.m.p.ptr(), gp, next) //放入运行队列 if atomic.Load(&sched.npidle) != 0 && atomic.Load(&sched.nmspinning) == 0 { //有空闲的p而且没有正在偷取goroutine的工作线程,则需要唤醒p出来工作 wakep() } ...... }
对于本章我们分析的场景,执行到这里main goroutine已经被放入了运行队列,但还未被调度起来运行,而g2 goroutine在向channel写完数据之后就从这里的ready函数返回并退出了,从第二章我们对goroutine的退出流程的分析可以得知,在g2的退出过程中将会在goexit0函数中调用schedule函数进入下一轮调度,从而把刚刚放入运行队列的main goroutine调度起来运行。