go语言的GC

mark&sweep, 2分钟保证至少一次GC过程,如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中,会停止一切G的运行,mark的过程是多任务并发的
sweep的过程是分散的

mark过程

整个程序内存块包括 .data, .bss, 每个G的stack, SpecialFinalizer
每段内存都有其相应的bitmap,用来表示每个word(8BYTE)的标志位,每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:

  1. 标量

  2. 指针

  3. 连续两个word表示一个iface或eface

    // scanblock scans a block of n bytes starting at pointer b for references
    // to other objects, scanning any it finds recursively until there are no
    // unscanned objects left. Instead of using an explicit recursion, it keeps
    // a work list in the Workbuf* structures and loops in the main function
    // body. Keeping an explicit work list is easier on the stack allocator and
    // more efficient.
    static void
    scanblock(byte *b, uintptr n, byte *ptrmask)
    {
    byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
    uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
    Workbuf *wbuf;
    Iface *iface;
    Eface *eface;
    Type *typ;
    MSpan *s;
    pageID k;
    bool keepworking;

     // Cache memory arena parameters in local vars.
     arena_start = runtime·mheap.arena_start;
     arena_used = runtime·mheap.arena_used;
    
     wbuf = getempty(nil);
     nobj = wbuf->nobj;
     wp = &wbuf->obj[nobj];
     keepworking = b == nil;
     scanbufpos = 0;
     for(i = 0; i < nelem(scanbuf); i++)
     	scanbuf[i] = nil;
    
     ptrbitp = nil;
    
     // ptrmask can have 2 possible values:
     // 1. nil - obtain pointer mask from GC bitmap.
     // 2. pointer to a compact mask (for stacks and data).
     if(b != nil)
     	goto scanobj;
     for(;;) {
     	if(nobj == 0) {
     		// Out of work in workbuf.
     		// First, see is there is any work in scanbuf.
     		for(i = 0; i < nelem(scanbuf); i++) {
     			b = scanbuf[scanbufpos];
     			scanbuf[scanbufpos++] = nil;
     			scanbufpos %= nelem(scanbuf);
     			if(b != nil) {
     				n = arena_used - b; // scan until bitBoundary or BitsDead
     				ptrmask = nil; // use GC bitmap for pointer info
     				goto scanobj;
     			}
     		}
     		if(!keepworking) {
     			putempty(wbuf);
     			return;
     		}
     		// Refill workbuf from global queue.
     		wbuf = getfull(wbuf);
     		if(wbuf == nil)
     			return;
     		nobj = wbuf->nobj;
     		wp = &wbuf->obj[nobj];
     	}
    
     	// If another proc wants a pointer, give it some.
     	if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
     		wbuf->nobj = nobj;
     		wbuf = handoff(wbuf);
     		nobj = wbuf->nobj;
     		wp = &wbuf->obj[nobj];
     	}
    
     	wp--;
     	nobj--;
     	b = *wp;
     	n = arena_used - b; // scan until next bitBoundary or BitsDead
     	ptrmask = nil; // use GC bitmap for pointer info
    
     scanobj:
     	if(DebugPtrs)
     		runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
     	// Find bits of the beginning of the object.
     	if(ptrmask == nil) {
     		off = (uintptr*)b - (uintptr*)arena_start;
     		ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
     	}
     	for(i = 0; i < n; i += PtrSize) {
     		obj = nil;
     		// Find bits for this word.
             ......
    
     		if(bits <= BitsScalar) // BitsScalar || BitsDead
     			continue;
     		if(bits == BitsPointer) {
     			obj = *(byte**)(b+i);
     			obj0 = obj;
     			goto markobj;
     		}
    
     		// With those three out of the way, must be multi-word.
     		if(Debug && bits != BitsMultiWord)
     			runtime·throw("unexpected garbage collection bits");
     		// Find the next pair of bits.
     		if(ptrmask == nil) {
     			bits = *ptrbitp;
     			j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
     			ptrbitp -= j;
     			bits >>= gcBits*j;
     			bits = (bits>>2)&BitsMask;
     		} else
     			bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;
    
     		if(Debug && bits != BitsIface && bits != BitsEface)
     			runtime·throw("unexpected garbage collection bits");
    
     		if(bits == BitsIface) {
     			iface = (Iface*)(b+i);
     			if(iface->tab != nil) {
     				typ = iface->tab->type;
     				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
     					obj = iface->data;
     			}
     		} else {
     			eface = (Eface*)(b+i);
     			typ = eface->type;
     			if(typ != nil) {
     				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
     					obj = eface->data;
     			}
     		}
    
     		i += PtrSize;
    
     		obj0 = obj;
     	markobj:
     		// At this point we have extracted the next potential pointer.
     		// Check if it points into heap.
     		if(obj == nil)
     			continue;
     		if(obj < arena_start || obj >= arena_used) {
     			if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
     				s = nil;
     				goto badobj;
     			}
     			continue;
     		}
     		// Mark the object.
     		obj = (byte*)((uintptr)obj & ~(PtrSize-1));
     		off = (uintptr*)obj - (uintptr*)arena_start;
     		bitp = arena_start - off/wordsPerBitmapByte - 1;
     		shift = (off % wordsPerBitmapByte) * gcBits;
     		xbits = *bitp;
     		bits = (xbits >> shift) & bitMask;
     		if((bits&bitBoundary) == 0) {
     			// Not a beginning of a block, consult span table to find the block beginning.
                 ......
                 
     			obj = p;
     			goto markobj;
     		}
     		if(DebugPtrs)
     			runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);
    
     		if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
     			// Running garbage collection again because
     			// we want to find the path from a root to a bad pointer.
     			// Found possible next step; extend or finish path.
     			for(j=0; j<nbadblock; j++)
     				if(badblock[j] == (uintptr)b)
     					goto AlreadyBad;
     			runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
     			if(ptrmask != nil)
     				runtime·throw("bad pointer");
     			if(nbadblock >= nelem(badblock))
     				runtime·throw("badblock trace too long");
     			badblock[nbadblock++] = (uintptr)b;
     		AlreadyBad:;
     		}
    
     		// Now we have bits, bitp, and shift correct for
     		// obj pointing at the base of the object.
     		// Only care about not marked objects.
     		if((bits&bitMarked) != 0)
     			continue;
     		// If obj size is greater than 8, then each byte of GC bitmap
     		// contains info for at most one object. In such case we use
     		// non-atomic byte store to mark the object. This can lead
     		// to double enqueue of the object for scanning, but scanning
     		// is an idempotent operation, so it is OK. This cannot lead
     		// to bitmap corruption because the single marked bit is the
     		// only thing that can change in the byte.
     		// For 8-byte objects we use non-atomic store, if the other
     		// quadruple is already marked. Otherwise we resort to CAS
     		// loop for marking.
     		if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
     			runtime·work.nproc == 1)
     			*bitp = xbits | (bitMarked<<shift);
     		else
     			runtime·atomicor8(bitp, bitMarked<<shift);
    
     		if(((xbits>>(shift+2))&BitsMask) == BitsDead)
     			continue;  // noscan object
    
     		// Queue the obj for scanning.
     		PREFETCH(obj);
     		p = scanbuf[scanbufpos];
     		scanbuf[scanbufpos++] = obj;
     		scanbufpos %= nelem(scanbuf);
     		if(p == nil)
     			continue;
    
     		// If workbuf is full, obtain an empty one.
     		if(nobj >= nelem(wbuf->obj)) {
     			wbuf->nobj = nobj;
     			wbuf = getempty(wbuf);
     			nobj = wbuf->nobj;
     			wp = &wbuf->obj[nobj];
     		}
     		*wp = p;
     		wp++;
     		nobj++;
     	}
         ............
     }
    

    }

    static void
    markroot(ParFor *desc, uint32 i)
    {
    FinBlock *fb;
    MSpan *s;
    uint32 spanidx, sg;
    G *gp;
    void *p;
    uint32 status;
    bool restart;

     USED(&desc);
     // Note: if you add a case here, please also update heapdump.c:dumproots.
     switch(i) {
     case RootData:
     	scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
     	break;
    
     case RootBss:
     	scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
     	break;
    
     case RootFinalizers:
     	for(fb=runtime·allfin; fb; fb=fb->alllink)
     		scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
     	break;
    
     case RootSpans:
     	// mark MSpan.specials
         ......
     	break;
    
     case RootFlushCaches:
     	flushallmcaches();
     	break;
    
     default:
     	// the rest is scanning goroutine stacks
     	if(i - RootCount >= runtime·allglen)
     		runtime·throw("markroot: bad index");
     	gp = runtime·allg[i - RootCount];
     	// remember when we've first observed the G blocked
     	// needed only to output in traceback
     	status = runtime·readgstatus(gp);
     	if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
     		gp->waitsince = runtime·work.tstart;
     	// Shrink a stack if not much of it is being used.
     	runtime·shrinkstack(gp);
     	if(runtime·readgstatus(gp) == Gdead) 
     		gp->gcworkdone = true;
     	else 
     		gp->gcworkdone = false; 
     	restart = runtime·stopg(gp);
     	scanstack(gp);
     	if(restart)
     		runtime·restartg(gp);
     	break;
     }
    

    }

sweep过程

sweep扫描span里面每一个对象是否marked,将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist,那么会将span的内存释放到heap中。

// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
    MSpan *s;
    uint32 idx, sg;
    uintptr npages;
 
    // increment locks to ensure that the goroutine is not preempted
    // in the middle of sweep thus leaving the span in an inconsistent state for next GC
    g->m->locks++;
    sg = runtime·mheap.sweepgen;
    for(;;) {
    	idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
    	if(idx >= runtime·work.nspan) {
    		runtime·mheap.sweepdone = true;
    		g->m->locks--;
    		return -1;
    	}
    	s = runtime·work.spans[idx];
    	if(s->state != MSpanInUse) {
    		s->sweepgen = sg;
    		continue;
    	}
    	if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
    		continue;
    	npages = s->npages;
    	if(!runtime·MSpan_Sweep(s, false))
    		npages = 0;
    	g->m->locks--;
    	return npages;
    }
}
 
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
    int32 cl, n, npages, nfree;
    uintptr size, off, step;
    uint32 sweepgen;
    byte *p, *bitp, shift, xbits, bits;
    MCache *c;
    byte *arena_start;
    MLink head, *end, *link;
    Special *special, **specialp, *y;
    bool res, sweepgenset;
 
    // It's critical that we enter this function with preemption disabled,
    // GC must not start while we are in the middle of this function.
    if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
    	runtime·throw("MSpan_Sweep: m is not locked");
    sweepgen = runtime·mheap.sweepgen;
    if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    	runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    		s->state, s->sweepgen, sweepgen);
    	runtime·throw("MSpan_Sweep: bad span state");
    }
    arena_start = runtime·mheap.arena_start;
    cl = s->sizeclass;
    size = s->elemsize;
    if(cl == 0) {
    	n = 1;
    } else {
    	// Chunk full of small blocks.
    	npages = runtime·class_to_allocnpages[cl];
    	n = (npages << PageShift) / size;
    }
    res = false;
    nfree = 0;
    end = &head;
    c = g->m->mcache;
    sweepgenset = false;
 
    // Mark any free objects in this span so we don't collect them.
    for(link = s->freelist; link != nil; link = link->next) {
    	off = (uintptr*)link - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	*bitp |= bitMarked<<shift;
    }
 
    // Unlink & free special records for any objects we're about to free.
    specialp = &s->specials;
    special = *specialp;
    while(special != nil) {
    	// A finalizer can be set for an inner byte of an object, find object beginning.
    	p = (byte*)(s->start << PageShift) + special->offset/size*size;
    	off = (uintptr*)p - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	bits = (*bitp>>shift) & bitMask;
    	if((bits&bitMarked) == 0) {
    		// Find the exact byte for which the special was setup
    		// (as opposed to object beginning).
    		p = (byte*)(s->start << PageShift) + special->offset;
    		// about to free object: splice out special record
    		y = special;
    		special = special->next;
    		*specialp = special;
    		if(!runtime·freespecial(y, p, size, false)) {
    			// stop freeing of object if it has a finalizer
    			*bitp |= bitMarked << shift;
    		}
    	} else {
    		// object is still live: keep special record
    		specialp = &special->next;
    		special = *specialp;
    	}
    }
 
    // Sweep through n objects of given size starting at p.
    // This thread owns the span now, so it can manipulate
    // the block bitmap without atomic operations.
    p = (byte*)(s->start << PageShift);
    // Find bits for the beginning of the span.
    off = (uintptr*)p - (uintptr*)arena_start;
    bitp = arena_start - off/wordsPerBitmapByte - 1;
    shift = 0;
    step = size/(PtrSize*wordsPerBitmapByte);
    // Rewind to the previous quadruple as we move to the next
    // in the beginning of the loop.
    bitp += step;
    if(step == 0) {
    	// 8-byte objects.
    	bitp++;
    	shift = gcBits;
    }
    for(; n > 0; n--, p += size) {
    	bitp -= step;
    	if(step == 0) {
    		if(shift != 0)
    			bitp--;
    		shift = gcBits - shift;
    	}
 
    	xbits = *bitp;
    	bits = (xbits>>shift) & bitMask;
 
    	// Allocated and marked object, reset bits to allocated.
    	if((bits&bitMarked) != 0) {
    		*bitp &= ~(bitMarked<<shift);
    		continue;
    	}
    	// At this point we know that we are looking at garbage object
    	// that needs to be collected.
    	if(runtime·debug.allocfreetrace)
    		runtime·tracefree(p, size);
    	// Reset to allocated+noscan.
    	*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
    	if(cl == 0) {
    		// Free large span.
    		if(preserve)
    			runtime·throw("can't preserve large span");
    		runtime·unmarkspan(p, s->npages<<PageShift);
    		s->needzero = 1;
    		// important to set sweepgen before returning it to heap
    		runtime·atomicstore(&s->sweepgen, sweepgen);
    		sweepgenset = true;
    		// NOTE(rsc,dvyukov): The original implementation of efence
    		// in CL 22060046 used SysFree instead of SysFault, so that
    		// the operating system would eventually give the memory
    		// back to us again, so that an efence program could run
    		// longer without running out of memory. Unfortunately,
    		// calling SysFree here without any kind of adjustment of the
    		// heap data structures means that when the memory does
    		// come back to us, we have the wrong metadata for it, either in
    		// the MSpan structures or in the garbage collection bitmap.
    		// Using SysFault here means that the program will run out of
    		// memory fairly quickly in efence mode, but at least it won't
    		// have mysterious crashes due to confused memory reuse.
    		// It should be possible to switch back to SysFree if we also
    		// implement and then call some kind of MHeap_DeleteSpan.
    		if(runtime·debug.efence) {
    			s->limit = nil;	// prevent mlookup from finding this span
    			runtime·SysFault(p, size);
    		} else
    			runtime·MHeap_Free(&runtime·mheap, s, 1);
    		c->local_nlargefree++;
    		c->local_largefree += size;
    		runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
    		res = true;
    	} else {
    		// Free small object.
    		if(size > 2*sizeof(uintptr))
    			((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
    		else if(size > sizeof(uintptr))
    			((uintptr*)p)[1] = 0;
 
    		end->next = (MLink*)p;
    		end = (MLink*)p;
    		nfree++;
    	}
    }
 
    // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
    // because of the potential for a concurrent free/SetFinalizer.
    // But we need to set it before we make the span available for allocation
    // (return it to heap or mcentral), because allocation code assumes that a
    // span is already swept if available for allocation.
 
    if(!sweepgenset && nfree == 0) {
    	// The span must be in our exclusive ownership until we update sweepgen,
    	// check for potential races.
    	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    		runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    			s->state, s->sweepgen, sweepgen);
    		runtime·throw("MSpan_Sweep: bad span state after sweep");
    	}
    	runtime·atomicstore(&s->sweepgen, sweepgen);
    }
    if(nfree > 0) {
    	c->local_nsmallfree[cl] += nfree;
    	c->local_cachealloc -= nfree * size;
    	runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
    	res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
    	// MCentral_FreeSpan updates sweepgen
    }
    return res;
}

posted on 2015-05-17 14:28  richmonkey  阅读(603)  评论(0编辑  收藏  举报

导航