C协程实现的效率对比
前段时间实现的C协程依赖栈传递参数,在开启优化时会导致错误,于是实现了一个ucontext的版本,但ucontext的切换效率太差了,
在我的机器上执行4000W次切换需要11秒左右,这达不到我的要求,所以重新设计了实现,使得在开启优化时也能得到正确的结果.
并且效率也令人满意,4000W次切换仅需要730ms左右,足足比ucontext的实现快乐近15倍。
下面贴出实现:
#include "uthread.h" #include <stdlib.h> #include <ucontext.h> #include <pthread.h> #include "link_list.h" struct uthread { int32_t reg[8];//0:esp,1:ebp,2:eax,3:ebx,4:ecx,5:edx,6:edi,7:esi void *para; uthread_t parent; void*(*main_fun)(void*); void *stack; int32_t ssize; int8_t first_run; }; #ifdef _DEBUG //for debug version void uthread_main_function() { int32_t arg; __asm__ volatile( "movl %%eax,%0\t\n" : :"m"(arg) ); uthread_t u = (uthread_t)arg; void *ret = u->main_fun(u->para); if(u->parent) uthread_switch(u,u->parent,ret); else exit(0); } #else //for release version void __attribute__((regparm(1))) uthread_main_function(void *arg) { uthread_t u = (uthread_t)arg; void *ret = u->main_fun(u->para); if(u->parent) uthread_switch(u,u->parent,ret); else exit(0); } #endif uthread_t uthread_create(uthread_t parent,void*stack,uint32_t stack_size,void*(*fun)(void*)) { uthread_t u = (uthread_t)calloc(1,sizeof(*u)); u->parent = parent; u->main_fun = fun; u->stack = stack; u->ssize = stack_size; if(stack) { u->reg[0] = (int32_t)stack+stack_size-4; u->reg[1] = (int32_t)stack+stack_size-4; } if(u->main_fun) u->first_run = 1; return u; } void uthread_destroy(uthread_t *u) { free(*u); *u = NULL; } #ifdef _DEBUG void* __attribute__((regparm(3))) uthread_switch(uthread_t from,uthread_t to,void *para) { if(!from) return NULL; to->para = para; int32_t esp,ebp,eax,ebx,ecx,edx,edi,esi; //save current registers //the order is important __asm__ volatile( "movl %%eax,%2\t\n" "movl %%ebx,%3\t\n" "movl %%ecx,%4\t\n" "movl %%edx,%5\t\n" "movl %%edi,%6\t\n" "movl %%esi,%7\t\n" "movl %%ebp,%1\t\n" "movl %%esp,%0\t\n" : :"m"(esp),"m"(ebp),"m"(eax),"m"(ebx),"m"(ecx),"m"(edx),"m"(edi),"m"(esi) ); from->reg[0] = esp; from->reg[1] = ebp; from->reg[2] = eax; from->reg[3] = ebx; from->reg[4] = ecx; from->reg[5] = edx; from->reg[6] = edi; from->reg[7] = esi; if(to->first_run) { to->first_run = 0; esp = to->reg[0]; //use eax to pass arg eax = (int32_t)to; __asm__ volatile ( "movl %1,%%eax\t\n" "movl %0,%%ebp\t\n" "movl %%ebp,%%esp\t\n" : :"m"(esp),"m"(eax) ); uthread_main_function(); } else { esp = to->reg[0]; ebp = to->reg[1]; eax = to->reg[2]; ebx = to->reg[3]; ecx = to->reg[4]; edx = to->reg[5]; edi = to->reg[6]; esi = to->reg[7]; //the order is important __asm__ volatile ( "movl %2,%%eax\t\n" "movl %3,%%ebx\t\n" "movl %4,%%ecx\t\n" "movl %5,%%edx\t\n" "movl %6,%%edi\t\n" "movl %7,%%esi\t\n" "movl %1,%%ebp\t\n" "movl %0,%%esp\t\n" : :"m"(esp),"m"(ebp),"m"(eax),"m"(ebx),"m"(ecx),"m"(edx),"m"(edi),"m"(esi) ); } return from->para; } #else void* __attribute__((regparm(3))) uthread_switch(uthread_t from,uthread_t to,void *para) { if(!from) return NULL; to->para = para; int32_t esp,ebp,edi,esi; //save current registers //the order is important __asm__ volatile( "movl %%eax,%2\t\n" "movl %%ebx,%3\t\n" "movl %%ecx,%4\t\n" "movl %%edx,%5\t\n" "movl %%edi,%6\t\n" "movl %%esi,%7\t\n" "movl %%ebp,%1\t\n" "movl %%esp,%0\t\n" : :"m"(from->reg[0]),"m"(from->reg[1]),"m"(from->reg[2]),"m"(from->reg[3]) ,"m"(from->reg[4]),"m"(from->reg[5]),"m"(from->reg[6]),"m"(from->reg[7]) ); if(to->first_run) { to->first_run = 0; //change stack //the order is important __asm__ volatile ( "movl %0,%%ebp\t\n" "movl %%ebp,%%esp\t\n" : :"m"(to->reg[0]) ); uthread_main_function((void*)to); } else { esp = to->reg[0]; ebp = to->reg[1]; edi = to->reg[6]; esi = to->reg[7]; //the order is important __asm__ volatile ( "movl %2,%%eax\t\n" "movl %3,%%ebx\t\n" "movl %4,%%ecx\t\n" "movl %5,%%edx\t\n" "movl %6,%%edi\t\n" "movl %7,%%esi\t\n" "movl %1,%%ebp\t\n" "movl %0,%%esp\t\n" : :"m"(esp),"m"(ebp),"m"(to->reg[2]),"m"(to->reg[3]) ,"m"(to->reg[4]),"m"(to->reg[5]),"m"(edi),"m"(esi) ); } return from->para; } #endif
test.c
#include <stdio.h> #include "uthread.h" #include "SysTime.h" #include <stdlib.h> void* ufun2(void *arg) { printf("ufun2\n"); char **tmp = (char**)arg; uthread_t self = (uthread_t)tmp[0]; uthread_t parent = (uthread_t)tmp[1]; volatile void *ptr = self; while(ptr) { ptr = uthread_switch(self,parent,NULL); } return NULL; } char *stack1; char *stack2; void* ufun1(void *arg) { uthread_t self = (uthread_t)arg; uthread_t u = uthread_create(self,stack2,4096,ufun2); char* _arg[2]; _arg[0] = (char*)u; _arg[1] = (char*)self; int i = 0; uint32_t tick = GetSystemMs(); for( ; i < 20000000; ++i) { uthread_switch(self,u,&_arg[0]); } printf("%d\n",GetSystemMs()-tick); uthread_switch(self,u,NULL); return arg; } int main() { stack1 = (char*)malloc(4096); stack2 = (char*)malloc(4096); /* * if use ucontext version char dummy_stack[4096]; uthread_t p = uthread_create(NULL,dummy_stack,0,NULL); */ uthread_t p = uthread_create(NULL,NULL,0,NULL); uthread_t u = uthread_create(p,stack1,4096,ufun1); uthread_switch(p,u,u); printf("main end\n"); return 0; };