go语言的GC

mark&sweep, 2分钟保证至少一次GC过程，如果分配的总内存超过上次分配的总内存一定比例(默认100%)后进行一次GC
进行mark的过程中，会停止一切G的运行，mark的过程是多任务并发的
sweep的过程是分散的

mark过程

整个程序内存块包括 .data， .bss，每个G的stack， SpecialFinalizer
每段内存都有其相应的bitmap，用来表示每个word（8BYTE）的标志位，每word需要4bit的标志位
mark的过程就是递归遍历内存块bitmap的过程
word标志位有如下3种类型:

标量
指针

连续两个word表示一个iface或eface

// scanblock scans a block of n bytes starting at pointer b for references
// to other objects, scanning any it finds recursively until there are no
// unscanned objects left. Instead of using an explicit recursion, it keeps
// a work list in the Workbuf* structures and loops in the main function
// body. Keeping an explicit work list is easier on the stack allocator and
// more efficient.
static void
scanblock(byte *b, uintptr n, byte *ptrmask)
{
byte *obj, *obj0, *p, *arena_start, *arena_used, **wp, *scanbuf[8], *ptrbitp, *bitp;
uintptr i, j, nobj, size, idx, x, off, scanbufpos, bits, xbits, shift;
Workbuf *wbuf;
Iface *iface;
Eface *eface;
Type *typ;
MSpan *s;
pageID k;
bool keepworking;

 // Cache memory arena parameters in local vars.
 arena_start = runtime·mheap.arena_start;
 arena_used = runtime·mheap.arena_used;

 wbuf = getempty(nil);
 nobj = wbuf->nobj;
 wp = &wbuf->obj[nobj];
 keepworking = b == nil;
 scanbufpos = 0;
 for(i = 0; i < nelem(scanbuf); i++)
 	scanbuf[i] = nil;

 ptrbitp = nil;

 // ptrmask can have 2 possible values:
 // 1. nil - obtain pointer mask from GC bitmap.
 // 2. pointer to a compact mask (for stacks and data).
 if(b != nil)
 	goto scanobj;
 for(;;) {
 	if(nobj == 0) {
 		// Out of work in workbuf.
 		// First, see is there is any work in scanbuf.
 		for(i = 0; i < nelem(scanbuf); i++) {
 			b = scanbuf[scanbufpos];
 			scanbuf[scanbufpos++] = nil;
 			scanbufpos %= nelem(scanbuf);
 			if(b != nil) {
 				n = arena_used - b; // scan until bitBoundary or BitsDead
 				ptrmask = nil; // use GC bitmap for pointer info
 				goto scanobj;
 			}
 		}
 		if(!keepworking) {
 			putempty(wbuf);
 			return;
 		}
 		// Refill workbuf from global queue.
 		wbuf = getfull(wbuf);
 		if(wbuf == nil)
 			return;
 		nobj = wbuf->nobj;
 		wp = &wbuf->obj[nobj];
 	}

 	// If another proc wants a pointer, give it some.
 	if(runtime·work.nwait > 0 && nobj > 4 && runtime·work.full == 0) {
 		wbuf->nobj = nobj;
 		wbuf = handoff(wbuf);
 		nobj = wbuf->nobj;
 		wp = &wbuf->obj[nobj];
 	}

 	wp--;
 	nobj--;
 	b = *wp;
 	n = arena_used - b; // scan until next bitBoundary or BitsDead
 	ptrmask = nil; // use GC bitmap for pointer info

 scanobj:
 	if(DebugPtrs)
 		runtime·printf("scanblock %p +%p %p\n", b, n, ptrmask);
 	// Find bits of the beginning of the object.
 	if(ptrmask == nil) {
 		off = (uintptr*)b - (uintptr*)arena_start;
 		ptrbitp = arena_start - off/wordsPerBitmapByte - 1;
 	}
 	for(i = 0; i < n; i += PtrSize) {
 		obj = nil;
 		// Find bits for this word.
         ......

 		if(bits <= BitsScalar) // BitsScalar || BitsDead
 			continue;
 		if(bits == BitsPointer) {
 			obj = *(byte**)(b+i);
 			obj0 = obj;
 			goto markobj;
 		}

 		// With those three out of the way, must be multi-word.
 		if(Debug && bits != BitsMultiWord)
 			runtime·throw("unexpected garbage collection bits");
 		// Find the next pair of bits.
 		if(ptrmask == nil) {
 			bits = *ptrbitp;
 			j = ((uintptr)b+i+PtrSize)/PtrSize & 1;
 			ptrbitp -= j;
 			bits >>= gcBits*j;
 			bits = (bits>>2)&BitsMask;
 		} else
 			bits = (ptrmask[((i+PtrSize)/PtrSize)/4]>>((((i+PtrSize)/PtrSize)%4)*BitsPerPointer))&BitsMask;

 		if(Debug && bits != BitsIface && bits != BitsEface)
 			runtime·throw("unexpected garbage collection bits");

 		if(bits == BitsIface) {
 			iface = (Iface*)(b+i);
 			if(iface->tab != nil) {
 				typ = iface->tab->type;
 				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
 					obj = iface->data;
 			}
 		} else {
 			eface = (Eface*)(b+i);
 			typ = eface->type;
 			if(typ != nil) {
 				if(!(typ->kind&KindDirectIface) || !(typ->kind&KindNoPointers))
 					obj = eface->data;
 			}
 		}

 		i += PtrSize;

 		obj0 = obj;
 	markobj:
 		// At this point we have extracted the next potential pointer.
 		// Check if it points into heap.
 		if(obj == nil)
 			continue;
 		if(obj < arena_start || obj >= arena_used) {
 			if((uintptr)obj < PhysPageSize && runtime·invalidptr) {
 				s = nil;
 				goto badobj;
 			}
 			continue;
 		}
 		// Mark the object.
 		obj = (byte*)((uintptr)obj & ~(PtrSize-1));
 		off = (uintptr*)obj - (uintptr*)arena_start;
 		bitp = arena_start - off/wordsPerBitmapByte - 1;
 		shift = (off % wordsPerBitmapByte) * gcBits;
 		xbits = *bitp;
 		bits = (xbits >> shift) & bitMask;
 		if((bits&bitBoundary) == 0) {
 			// Not a beginning of a block, consult span table to find the block beginning.
             ......
             
 			obj = p;
 			goto markobj;
 		}
 		if(DebugPtrs)
 			runtime·printf("scan *%p = %p => base %p\n", b+i, obj0, obj);

 		if(nbadblock > 0 && (uintptr)obj == badblock[nbadblock-1]) {
 			// Running garbage collection again because
 			// we want to find the path from a root to a bad pointer.
 			// Found possible next step; extend or finish path.
 			for(j=0; j<nbadblock; j++)
 				if(badblock[j] == (uintptr)b)
 					goto AlreadyBad;
 			runtime·printf("runtime: found *(%p+%p) = %p+%p\n", b, i, obj0, (uintptr)(obj-obj0));
 			if(ptrmask != nil)
 				runtime·throw("bad pointer");
 			if(nbadblock >= nelem(badblock))
 				runtime·throw("badblock trace too long");
 			badblock[nbadblock++] = (uintptr)b;
 		AlreadyBad:;
 		}

 		// Now we have bits, bitp, and shift correct for
 		// obj pointing at the base of the object.
 		// Only care about not marked objects.
 		if((bits&bitMarked) != 0)
 			continue;
 		// If obj size is greater than 8, then each byte of GC bitmap
 		// contains info for at most one object. In such case we use
 		// non-atomic byte store to mark the object. This can lead
 		// to double enqueue of the object for scanning, but scanning
 		// is an idempotent operation, so it is OK. This cannot lead
 		// to bitmap corruption because the single marked bit is the
 		// only thing that can change in the byte.
 		// For 8-byte objects we use non-atomic store, if the other
 		// quadruple is already marked. Otherwise we resort to CAS
 		// loop for marking.
 		if((xbits&(bitMask|(bitMask<<gcBits))) != (bitBoundary|(bitBoundary<<gcBits)) ||
 			runtime·work.nproc == 1)
 			*bitp = xbits | (bitMarked<<shift);
 		else
 			runtime·atomicor8(bitp, bitMarked<<shift);

 		if(((xbits>>(shift+2))&BitsMask) == BitsDead)
 			continue;  // noscan object

 		// Queue the obj for scanning.
 		PREFETCH(obj);
 		p = scanbuf[scanbufpos];
 		scanbuf[scanbufpos++] = obj;
 		scanbufpos %= nelem(scanbuf);
 		if(p == nil)
 			continue;

 		// If workbuf is full, obtain an empty one.
 		if(nobj >= nelem(wbuf->obj)) {
 			wbuf->nobj = nobj;
 			wbuf = getempty(wbuf);
 			nobj = wbuf->nobj;
 			wp = &wbuf->obj[nobj];
 		}
 		*wp = p;
 		wp++;
 		nobj++;
 	}
     ............
 }

}

static void
markroot(ParFor *desc, uint32 i)
{
FinBlock *fb;
MSpan *s;
uint32 spanidx, sg;
G *gp;
void *p;
uint32 status;
bool restart;

 USED(&desc);
 // Note: if you add a case here, please also update heapdump.c:dumproots.
 switch(i) {
 case RootData:
 	scanblock(runtime·data, runtime·edata - runtime·data, runtime·gcdatamask.bytedata);
 	break;

 case RootBss:
 	scanblock(runtime·bss, runtime·ebss - runtime·bss, runtime·gcbssmask.bytedata);
 	break;

 case RootFinalizers:
 	for(fb=runtime·allfin; fb; fb=fb->alllink)
 		scanblock((byte*)fb->fin, fb->cnt*sizeof(fb->fin[0]), finptrmask);
 	break;

 case RootSpans:
 	// mark MSpan.specials
     ......
 	break;

 case RootFlushCaches:
 	flushallmcaches();
 	break;

 default:
 	// the rest is scanning goroutine stacks
 	if(i - RootCount >= runtime·allglen)
 		runtime·throw("markroot: bad index");
 	gp = runtime·allg[i - RootCount];
 	// remember when we've first observed the G blocked
 	// needed only to output in traceback
 	status = runtime·readgstatus(gp);
 	if((status == Gwaiting || status == Gsyscall) && gp->waitsince == 0)
 		gp->waitsince = runtime·work.tstart;
 	// Shrink a stack if not much of it is being used.
 	runtime·shrinkstack(gp);
 	if(runtime·readgstatus(gp) == Gdead) 
 		gp->gcworkdone = true;
 	else 
 		gp->gcworkdone = false; 
 	restart = runtime·stopg(gp);
 	scanstack(gp);
 	if(restart)
 		runtime·restartg(gp);
 	break;
 }

}

sweep过程

sweep扫描span里面每一个对象是否marked，将未marked的对象放入span的freelist中
如果span中的所有对象都进入了freelist，那么会将span的内存释放到heap中。

// sweeps one span
// returns number of pages returned to heap, or -1 if there is nothing to sweep
uintptr
runtime·sweepone(void)
{
    MSpan *s;
    uint32 idx, sg;
    uintptr npages;
 
    // increment locks to ensure that the goroutine is not preempted
    // in the middle of sweep thus leaving the span in an inconsistent state for next GC
    g->m->locks++;
    sg = runtime·mheap.sweepgen;
    for(;;) {
    	idx = runtime·xadd(&runtime·sweep.spanidx, 1) - 1;
    	if(idx >= runtime·work.nspan) {
    		runtime·mheap.sweepdone = true;
    		g->m->locks--;
    		return -1;
    	}
    	s = runtime·work.spans[idx];
    	if(s->state != MSpanInUse) {
    		s->sweepgen = sg;
    		continue;
    	}
    	if(s->sweepgen != sg-2 || !runtime·cas(&s->sweepgen, sg-2, sg-1))
    		continue;
    	npages = s->npages;
    	if(!runtime·MSpan_Sweep(s, false))
    		npages = 0;
    	g->m->locks--;
    	return npages;
    }
}
 
// Sweep frees or collects finalizers for blocks not marked in the mark phase.
// It clears the mark bits in preparation for the next GC round.
// Returns true if the span was returned to heap.
// If preserve=true, don't return it to heap nor relink in MCentral lists;
// caller takes care of it.
bool
runtime·MSpan_Sweep(MSpan *s, bool preserve)
{
    int32 cl, n, npages, nfree;
    uintptr size, off, step;
    uint32 sweepgen;
    byte *p, *bitp, shift, xbits, bits;
    MCache *c;
    byte *arena_start;
    MLink head, *end, *link;
    Special *special, **specialp, *y;
    bool res, sweepgenset;
 
    // It's critical that we enter this function with preemption disabled,
    // GC must not start while we are in the middle of this function.
    if(g->m->locks == 0 && g->m->mallocing == 0 && g != g->m->g0)
    	runtime·throw("MSpan_Sweep: m is not locked");
    sweepgen = runtime·mheap.sweepgen;
    if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    	runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    		s->state, s->sweepgen, sweepgen);
    	runtime·throw("MSpan_Sweep: bad span state");
    }
    arena_start = runtime·mheap.arena_start;
    cl = s->sizeclass;
    size = s->elemsize;
    if(cl == 0) {
    	n = 1;
    } else {
    	// Chunk full of small blocks.
    	npages = runtime·class_to_allocnpages[cl];
    	n = (npages << PageShift) / size;
    }
    res = false;
    nfree = 0;
    end = &head;
    c = g->m->mcache;
    sweepgenset = false;
 
    // Mark any free objects in this span so we don't collect them.
    for(link = s->freelist; link != nil; link = link->next) {
    	off = (uintptr*)link - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	*bitp |= bitMarked<<shift;
    }
 
    // Unlink & free special records for any objects we're about to free.
    specialp = &s->specials;
    special = *specialp;
    while(special != nil) {
    	// A finalizer can be set for an inner byte of an object, find object beginning.
    	p = (byte*)(s->start << PageShift) + special->offset/size*size;
    	off = (uintptr*)p - (uintptr*)arena_start;
    	bitp = arena_start - off/wordsPerBitmapByte - 1;
    	shift = (off % wordsPerBitmapByte) * gcBits;
    	bits = (*bitp>>shift) & bitMask;
    	if((bits&bitMarked) == 0) {
    		// Find the exact byte for which the special was setup
    		// (as opposed to object beginning).
    		p = (byte*)(s->start << PageShift) + special->offset;
    		// about to free object: splice out special record
    		y = special;
    		special = special->next;
    		*specialp = special;
    		if(!runtime·freespecial(y, p, size, false)) {
    			// stop freeing of object if it has a finalizer
    			*bitp |= bitMarked << shift;
    		}
    	} else {
    		// object is still live: keep special record
    		specialp = &special->next;
    		special = *specialp;
    	}
    }
 
    // Sweep through n objects of given size starting at p.
    // This thread owns the span now, so it can manipulate
    // the block bitmap without atomic operations.
    p = (byte*)(s->start << PageShift);
    // Find bits for the beginning of the span.
    off = (uintptr*)p - (uintptr*)arena_start;
    bitp = arena_start - off/wordsPerBitmapByte - 1;
    shift = 0;
    step = size/(PtrSize*wordsPerBitmapByte);
    // Rewind to the previous quadruple as we move to the next
    // in the beginning of the loop.
    bitp += step;
    if(step == 0) {
    	// 8-byte objects.
    	bitp++;
    	shift = gcBits;
    }
    for(; n > 0; n--, p += size) {
    	bitp -= step;
    	if(step == 0) {
    		if(shift != 0)
    			bitp--;
    		shift = gcBits - shift;
    	}
 
    	xbits = *bitp;
    	bits = (xbits>>shift) & bitMask;
 
    	// Allocated and marked object, reset bits to allocated.
    	if((bits&bitMarked) != 0) {
    		*bitp &= ~(bitMarked<<shift);
    		continue;
    	}
    	// At this point we know that we are looking at garbage object
    	// that needs to be collected.
    	if(runtime·debug.allocfreetrace)
    		runtime·tracefree(p, size);
    	// Reset to allocated+noscan.
    	*bitp = (xbits & ~((bitMarked|(BitsMask<<2))<<shift)) | ((uintptr)BitsDead<<(shift+2));
    	if(cl == 0) {
    		// Free large span.
    		if(preserve)
    			runtime·throw("can't preserve large span");
    		runtime·unmarkspan(p, s->npages<<PageShift);
    		s->needzero = 1;
    		// important to set sweepgen before returning it to heap
    		runtime·atomicstore(&s->sweepgen, sweepgen);
    		sweepgenset = true;
    		// NOTE(rsc,dvyukov): The original implementation of efence
    		// in CL 22060046 used SysFree instead of SysFault, so that
    		// the operating system would eventually give the memory
    		// back to us again, so that an efence program could run
    		// longer without running out of memory. Unfortunately,
    		// calling SysFree here without any kind of adjustment of the
    		// heap data structures means that when the memory does
    		// come back to us, we have the wrong metadata for it, either in
    		// the MSpan structures or in the garbage collection bitmap.
    		// Using SysFault here means that the program will run out of
    		// memory fairly quickly in efence mode, but at least it won't
    		// have mysterious crashes due to confused memory reuse.
    		// It should be possible to switch back to SysFree if we also
    		// implement and then call some kind of MHeap_DeleteSpan.
    		if(runtime·debug.efence) {
    			s->limit = nil;	// prevent mlookup from finding this span
    			runtime·SysFault(p, size);
    		} else
    			runtime·MHeap_Free(&runtime·mheap, s, 1);
    		c->local_nlargefree++;
    		c->local_largefree += size;
    		runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100));
    		res = true;
    	} else {
    		// Free small object.
    		if(size > 2*sizeof(uintptr))
    			((uintptr*)p)[1] = (uintptr)0xdeaddeaddeaddeadll;	// mark as "needs to be zeroed"
    		else if(size > sizeof(uintptr))
    			((uintptr*)p)[1] = 0;
 
    		end->next = (MLink*)p;
    		end = (MLink*)p;
    		nfree++;
    	}
    }
 
    // We need to set s->sweepgen = h->sweepgen only when all blocks are swept,
    // because of the potential for a concurrent free/SetFinalizer.
    // But we need to set it before we make the span available for allocation
    // (return it to heap or mcentral), because allocation code assumes that a
    // span is already swept if available for allocation.
 
    if(!sweepgenset && nfree == 0) {
    	// The span must be in our exclusive ownership until we update sweepgen,
    	// check for potential races.
    	if(s->state != MSpanInUse || s->sweepgen != sweepgen-1) {
    		runtime·printf("MSpan_Sweep: state=%d sweepgen=%d mheap.sweepgen=%d\n",
    			s->state, s->sweepgen, sweepgen);
    		runtime·throw("MSpan_Sweep: bad span state after sweep");
    	}
    	runtime·atomicstore(&s->sweepgen, sweepgen);
    }
    if(nfree > 0) {
    	c->local_nsmallfree[cl] += nfree;
    	c->local_cachealloc -= nfree * size;
    	runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100));
    	res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl].mcentral, s, nfree, head.next, end, preserve);
    	// MCentral_FreeSpan updates sweepgen
    }
    return res;
}

posted on 2015-05-17 14:28 richmonkey 阅读(603) 评论(0) 编辑收藏举报

刷新页面返回顶部

richmonkey

go语言的GC

mark过程

sweep过程

导航

公告