Libev源码分析02:Libev中的IO监视器
一:代码流程
在Libev中,启动一个IO监视器,等待该监视器上的事件触发,然后调用该监视器的回调函数。整个的流程是这样的:
首先调用ev_default_loop初始化struct ev_loop结构;
然后调用ev_io_init初始化监视器中的属性,该宏主要就是调用ev_init和ev_io_set;
然后调用ev_io_start启动该监视器,该函数主要是将监视器添加到loop->anfds结构中,将监视的描述符添加到((loop)->fdchanges)中;
调用ev_run开始等待事件的触发,该函数中:
首先会调用fd_reify函数,该函数根据((loop)->fdchanges)中记录的描述符,将该描述符上的事件添加到backend所使用的数据结构中,比如select中的fd_set中;
然后调用time_update更新当前的时间,如果日历时间被人为调整的话,则相应的调整超时事件和周期事件;
调用backend_poll开始等待事件的发生,如果事件在规定时间内触发的话,则会调用fd_event将触发的监视器记录到loop->pendings中;
backend的监听函数(select,poll,epoll_wait)返回之后,首先再次调用time_update更新当前的时间,然后调用ev_invoke_pending,依次处理loop->pendings中的监视器,调用该监视器的回调函数。
以上就是Libev中IO监视器的工作流程,下面详细分析各个函数:
1:ev_default_loop函数
#if EV_MULTIPLICITY struct ev_loop * #else int #endif ev_default_loop (unsigned int flags) { if (!ev_default_loop_ptr) { #if EV_MULTIPLICITY struct ev_loop *loop = ev_default_loop_ptr = &default_loop_struct; #else ev_default_loop_ptr = 1; #endif loop_init (loop, flags); if (ev_backend (loop)) { #if EV_CHILD_ENABLE ev_signal_init (&childev, childcb, SIGCHLD); ev_set_priority (&childev, EV_MAXPRI); ev_signal_start (EV_A_ &childev); ev_unref (loop); /* child watcher should not keep loop alive */ #endif } else ev_default_loop_ptr = 0; } return ev_default_loop_ptr; }
EV_MULTIPLICITY宏用来决定是否支持多个loop。系统提供了默认的loop结构default_loop_struct,和指向其的指针ev_default_loop_ptr。
如果支持多个loop,则default_loop_struct就是一个静态的struct ev_loop类型的结构体,其中包含了各种成员,比如ev_tstamp ev_rt_now; int pendingpri;等等。
ev_default_loop_ptr就是指向struct ev_loop 类型的指针。
如果不支持多个loop,则上述的struct ev_loop结构就不复存在,其成员都是以静态变量的形式进行定义,而ev_default_loop_ptr也只是一个int变量,用来表明”loop”是否已经初始化成功。
下面的描述,均以支持多个loop为准。
在ev_default_loop中,首先是调用loop_init初始化loop中的各种成员:
static void loop_init (struct ev_loop *loop, unsigned int flags) { if (!backend) { origflags = flags; #if EV_USE_REALTIME if (!have_realtime) { struct timespec ts; if (!clock_gettime (CLOCK_REALTIME, &ts)) have_realtime = 1; } #endif #if EV_USE_MONOTONIC if (!have_monotonic) { struct timespec ts; if (!clock_gettime (CLOCK_MONOTONIC, &ts)) have_monotonic = 1; } #endif /* pid check not overridable via env */ #ifndef _WIN32 if (flags & EVFLAG_FORKCHECK) curpid = getpid (); #endif if (!(flags & EVFLAG_NOENV) && !enable_secure () && getenv ("LIBEV_FLAGS")) flags = atoi (getenv ("LIBEV_FLAGS")); ev_rt_now = ev_time (); mn_now = get_clock (); now_floor = mn_now; rtmn_diff = ev_rt_now - mn_now; #if EV_FEATURE_API invoke_cb = ev_invoke_pending; #endif io_blocktime = 0.; timeout_blocktime = 0.; backend = 0; backend_fd = -1; sig_pending = 0; #if EV_ASYNC_ENABLE async_pending = 0; #endif pipe_write_skipped = 0; pipe_write_wanted = 0; evpipe [0] = -1; evpipe [1] = -1; #if EV_USE_INOTIFY fs_fd = flags & EVFLAG_NOINOTIFY ? -1 : -2; #endif #if EV_USE_SIGNALFD sigfd = flags & EVFLAG_SIGNALFD ? -2 : -1; #endif if (!(flags & EVBACKEND_MASK)) flags |= ev_recommended_backends (); #if EV_USE_IOCP if (!backend && (flags & EVBACKEND_IOCP )) backend = iocp_init (EV_A_ flags); #endif #if EV_USE_PORT if (!backend && (flags & EVBACKEND_PORT )) backend = port_init (EV_A_ flags); #endif #if EV_USE_KQUEUE if (!backend && (flags & EVBACKEND_KQUEUE)) backend = kqueue_init (EV_A_ flags); #endif #if EV_USE_EPOLL if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init (EV_A_ flags); #endif #if EV_USE_POLL if (!backend && (flags & EVBACKEND_POLL )) backend = poll_init (EV_A_ flags); #endif #if EV_USE_SELECT if (!backend && (flags & EVBACKEND_SELECT)) backend = select_init (EV_A_ flags); #endif ev_prepare_init (&pending_w, pendingcb); #if EV_SIGNAL_ENABLE || EV_ASYNC_ENABLE ev_init (&pipe_w, pipecb); ev_set_priority (&pipe_w, EV_MAXPRI); #endif } }
调用ev_time初始化ev_rt_now,得到当前的日历时间,也就是自19700101000000以来的秒数,该值通过CLOCK_REALTIME或者gettimeofday得到;
调用get_clock初始化mn_now,该变量要么是CLOCK_MONOTONIC(系统启动时间),要么就是ev_time的值(日历时间);
然后就是:
now_floor = mn_now; rtmn_diff = ev_rt_now - mn_now; #if EV_FEATURE_API invoke_cb = ev_invoke_pending; #endif io_blocktime = 0.; timeout_blocktime = 0.; backend = 0; backend_fd = -1; sig_pending = 0; #if EV_ASYNC_ENABLE async_pending = 0; #endif pipe_write_skipped = 0; pipe_write_wanted = 0; evpipe [0] = -1; evpipe [1] = -1; #if EV_USE_INOTIFY fs_fd = flags & EVFLAG_NOINOTIFY ? -1 : -2; #endif #if EV_USE_SIGNALFD sigfd = flags & EVFLAG_SIGNALFD ? -2 : -1; #endif
之后调用ev_recommended_backends得到当前系统支持的backend类型,比如select,poll, epoll等。
接下来就是根据系统支持的backend,按照一定的优先顺序,去初始化backend:
#if EV_USE_IOCP if (!backend && (flags & EVBACKEND_IOCP )) backend = iocp_init (EV_A_ flags); #endif #if EV_USE_PORT if (!backend && (flags & EVBACKEND_PORT )) backend = port_init (EV_A_ flags); #endif #if EV_USE_KQUEUE if (!backend && (flags & EVBACKEND_KQUEUE)) backend = kqueue_init (EV_A_ flags); #endif #if EV_USE_EPOLL if (!backend && (flags & EVBACKEND_EPOLL )) backend = epoll_init (EV_A_ flags); #endif #if EV_USE_POLL if (!backend && (flags & EVBACKEND_POLL )) backend = poll_init (EV_A_ flags); #endif #if EV_USE_SELECT if (!backend && (flags & EVBACKEND_SELECT)) backend = select_init (EV_A_ flags); #endif
接下来,初始化loop中的ev_prepare监视器pending_w,以及ev_io监视器pipe_w
loop_init返回后,backend已经初始化完成,接着,初始化并启动信号监视器ev_signal childev。暂不深入。
至此,初始化默认loop的工作就完成了。
2:ev_init
该函数以宏的形式存在,主要用来设置监视器的公共成员active、pending、priority、cb等。代码如下:
#define ev_init(ev,cb_) do { \ ((ev_watcher *)(void *)(ev))->active = \ ((ev_watcher *)(void *)(ev))->pending = 0; \ ev_set_priority ((ev), 0); \ ev_set_cb ((ev), cb_); \ } while (0)
3:ev_io_set
该宏主要是设置IO监视器ev_io的特有成员:要监听的描述符fd和其上的事件event。其中设置event会包含事件掩码EV__IOFDSET,其代码如下:
#define ev_io_set(ev,fd_,events_) do { (ev)->fd = (fd_); (ev)->events = (events_) | EV__IOFDSET; } while (0)
4:ev_io_start
void ev_io_start (struct ev_loop *loop, ev_io *w) EV_THROW { int fd = w->fd; if (expect_false (ev_is_active (w))) return; assert (("libev: ev_io_start called with negative fd", fd >= 0)); assert (("libev: ev_io_start called with illegal event mask", !(w->events & ~(EV__IOFDSET | EV_READ | EV_WRITE)))); ev_start (loop, (W)w, 1); array_needsize (ANFD, anfds, anfdmax, fd + 1, array_init_zero); wlist_add (&anfds[fd].head, (WL)w); /* common bug, apparently */ assert (("libev: ev_io_start called with corrupted watcher", ((WL)w)->next != (WL)w)); fd_change (loop, fd, w->events & EV__IOFDSET | EV_ANFD_REIFY); w->events &= ~EV__IOFDSET; }
首先对监视器ev做检查:
ev->active ==0: 监视器现在的状态应是未启动的;
fd>=0;
(!(w->events& ~(EV__IOFDSET | EV_READ | EV_WRITE))): IO监视器只能监控EV__IOFDSET,EV_READ,EV_WRITE中的事件,其他事件一律不能关心。
调用ev_start矫正ev的优先级;置ev->active=1表明状态为启动状态;++(loop->activecnt)
根据情况调整((loop)->anfds)数组的大小,然后将监视器ev加入到(loop->anfds)[fd].head的链表中。
loop->anfds是ANFD结构类型的数组,ANFD结构体定义如下:
typedef struct { WL head; unsigned char events; /* the events watched for */ unsigned char reify; /* flag set when this ANFD needs reification (EV_ANFD_REIFY, EV__IOFDSET) */ unsigned char emask; /* the epoll backend stores the actual kernel mask in here */ unsigned char unused; #if EV_USE_EPOLL unsigned int egen; /* generation counter to counter epoll bugs */ #endif #if EV_SELECT_IS_WINSOCKET || EV_USE_IOCP SOCKET handle; #endif #if EV_USE_IOCP OVERLAPPED or, ow; #endif } ANFD;
每一个描述符对应着一个ANFD结构,描述符的值就是((loop)->anfds)的下标。每个描述符上可以有若干监视器,同一个描述符上的监视器以链表的形式组织,这里ANFD结构中的head就是链表头指针。
((loop)->anfds)数组是动态变化的,初始为空。(loop)->anfdmax就是该数组的当前大小。
调用fd_change(loop, fd, w->events & EV__IOFDSET |EV_ANFD_REIFY):
void fd_change (struct ev_loop *loop, int fd, int flags) { unsigned char reify = anfds [fd].reify; anfds [fd].reify |= flags; if (expect_true (!reify)) { ++fdchangecnt; array_needsize (int, fdchanges, fdchangemax, fdchangecnt, EMPTY2); fdchanges [fdchangecnt - 1] = fd; } }
查看(loop->anfds)[fd].reify的原值,如果原值为0,表明该描述符是第一次加入监控,将其记录到((loop)->fdchanges)数组中,该数组记录了当前监控中的描述符,((loop)->fdchangemax)记录该数组当前实际大小,((loop)->fdchangecnt)记录该数组当前使用大小。
将w->events & EV__IOFDSET | EV_ANFD_REIFY添加到(loop->anfds)[fd].reify中。
最后,将w->events中的EV__IOFDSET掩码消除:
w->events &= ~EV__IOFDSET;
5:fd_reify
void fd_reify (struct ev_loop *loop) { int i; for (i = 0; i < fdchangecnt; ++i) { int fd = fdchanges [i]; ANFD *anfd = anfds + fd; ev_io *w; unsigned char o_events = anfd->events; unsigned char o_reify = anfd->reify; anfd->reify = 0; /*if (expect_true (o_reify & EV_ANFD_REIFY)) probably a deoptimisation */ { anfd->events = 0; for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next) anfd->events |= (unsigned char)w->events; if (o_events != anfd->events) o_reify = EV__IOFDSET; /* actually |= */ } if (o_reify & EV__IOFDSET) backend_modify (loop, fd, o_events, anfd->events); } fdchangecnt = 0; }
轮训数组((loop)->fdchanges),从0到((loop)->fdchangecnt-1)之间的所有元素,每个元素代表了一个描述符,根据取得的描述符值fd,找到相应的ANFD结构anfd。
记录原anfd->events和anfd->reify的值,然后:
anfd->reify = 0; anfd->events = 0; for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next) anfd->events |= (unsigned char)w->events;
然后调用backend_modify函数开始对fd及其上的所有事件开始监控。以backend为select例,就是根据anfd->events中的事件,将fd添加到相应的fd_set中去。
最后,置((loop)->fdchangecnt)为0。
6:time_update函数
该函数重新获得mn_now、ev_rt_now等的值,并且如果发现时间被人为调整的话,则在代码中也作出相应的调整。ev_rt_now表示日历时间,mn_now要么表示系统启动时间,要么表示日历时间。
/* fetch new monotonic and realtime times from the kernel */ /* also detect if there was a timejump, and act accordingly */ void time_update (struct ev_loop *loop, ev_tstamp max_block) { #if EV_USE_MONOTONIC if (expect_true (have_monotonic)) { int i; ev_tstamp odiff = rtmn_diff; mn_now = get_clock (); /* only fetch the realtime clock every 0.5*MIN_TIMEJUMP seconds */ /* interpolate in the meantime */ if (expect_true (mn_now - now_floor < MIN_TIMEJUMP * .5)) { ev_rt_now = rtmn_diff + mn_now; return; } now_floor = mn_now; ev_rt_now = ev_time (); /* loop a few times, before making important decisions. * on the choice of "4": one iteration isn't enough, * in case we get preempted during the calls to * ev_time and get_clock. a second call is almost guaranteed * to succeed in that case, though. and looping a few more times * doesn't hurt either as we only do this on time-jumps or * in the unlikely event of having been preempted here. */ for (i = 4; --i; ) { ev_tstamp diff; rtmn_diff = ev_rt_now - mn_now; diff = odiff - rtmn_diff; if (expect_true ((diff < 0. ? -diff : diff) < MIN_TIMEJUMP)) return; /* all is well */ ev_rt_now = ev_time (); mn_now = get_clock (); now_floor = mn_now; } /* no timer adjustment, as the monotonic clock doesn't jump */ /* timers_reschedule (EV_A_ rtmn_diff - odiff) */ # if EV_PERIODIC_ENABLE periodics_reschedule (EV_A); # endif } else #endif { ev_rt_now = ev_time (); if (expect_false (mn_now > ev_rt_now || ev_rt_now > mn_now + max_block + MIN_TIMEJUMP)) { /* adjust timers. this is easy, as the offset is the same for all of them */ timers_reschedule (EV_A_ ev_rt_now - mn_now); #if EV_PERIODIC_ENABLE periodics_reschedule (EV_A); #endif } mn_now = ev_rt_now; } }
如果宏定义EV_USE_MONOTONIC为1,并且have_monotonic为1(sys_clock_gettime支持CLOCK_MONOTONIC)的话,mn_now就表示系统启动时间,它不会被人为的调整。
这种情况下,更新系统启动时间mn_now的值,如果该值与旧的mn_now的值之差不超过0.5s的话,表示刚刚更新过时间(更新时间不超过0.5s),则更新ev_rt_now之后,直接退出。
更新ev_rt_now的值,然后根据ev_rt_now- mn_now之差的变化,判断时间是否被人调整。如果ev_rt_now - mn_now之差的浮动小于1s,则说明时间没有调整,直接退出。如果浮动大于1s,则重新更新mn_now和ev_rt_now,再次判断时间差的浮动,如果判断了3次,浮动始终大于1s,说明时间被认为调整了,则需要更新周期事件,这种情况下不调整超时事件(超时事件都是根据mn_now设置的,在have_monotonic为1的情况下,mn_now表示系统启动时间,不会被调整)。
如果宏定义EV_USE_MONOTONIC为0,或者have_monotonic为0(sys_clock_gettime不支持CLOCK_MONOTONIC)的话,mn_now与ev_rt_now一样,也是日历时间。
这种情况下,更新ev_rt_now的值,将该值与之前的日历时间比较,如果时间被人调整了,则需要调整超时事件和周期事件。
7:fd_event
在backend_poll函数中,如果有些监视器的事件触发了,就会调用fd_event函数,将触发的描述符fd和事件event记录到pending数组中。
void fd_event (struct ev_loop *loop, int fd, int revents) { ANFD *anfd = anfds + fd; if (expect_true (!anfd->reify)) fd_event_nocheck (EV_A_ fd, revents); }
已经触发而还没有处理的事件状态称为PENDING状态。在fd_event函数中,根据fd找到相应的ANFD结构。然后就是:
if (expect_true (!anfd->reify)) fd_event_nocheck (EV_A_ fd, revents);
fd_event_nocheck的代码如下,根据fd找到相应的ANFD结构,轮训其中的监视器链表,如果某监视器上的事件触发了,则调用ev_feed_event函数处理:
fd_event_nocheck (EV_P_ int fd, int revents) { ANFD *anfd = anfds + fd; ev_io *w; for (w = (ev_io *)anfd->head; w; w = (ev_io *)((WL)w)->next) { int ev = w->events & revents; if (ev) ev_feed_event (EV_A_ (W)w, ev); } }
ev_feed_event代码如下:
void ev_feed_event (struct ev_loop *loop, void *w, int revents) { W w_ = (W)w; int pri = ABSPRI (w_); if (expect_false (w_->pending)) pendings [pri][w_->pending - 1].events |= revents; else { w_->pending = ++pendingcnt [pri]; array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, EMPTY2); pendings [pri][w_->pending - 1].w = w_; pendings [pri][w_->pending - 1].events = revents; } pendingpri = NUMPRI - 1; }
(loop->pendingcnt)是一个一维整型数组,(loop->pendingcnt)[i]表示当前处于PENDING状态的优先级为i的监视器的个数。
(loop->pendings )是个二维数组,每个元素类型为ANPENDING,该结构的定义如下:
/* stores the pending event set for a given watcher */ typedef struct { ev_watcher *w; int events; } ANPENDING;
APPENDING结构记录了处于PENDING状态的监视器以及触发的事件。(loop->pendings)数组,以优先级为第一维,以APPENDING为第二维。
在函数ev_feed_event中,判断w_->pending的值,该值为0表示该监视器第一次被激活,不为0表示的是该监视器已经处于PENDING状态,而其具体的值,代表该监视器在pendings [pri]中的排名(从1开始),也就是当前(loop->pendingcnt) [pri]的值。
该值不为0,说明该监视器已经处于PENDING状态了,因此只需要:
pendings [pri][w_->pending - 1].events |= revents;
如果该值为0,则
w_->pending = ++pendingcnt [pri]; array_needsize (ANPENDING, pendings [pri], pendingmax [pri], w_->pending, EMPTY2); pendings [pri][w_->pending - 1].w = w_; pendings [pri][w_->pending - 1].events = revents;
8:ev_invoke_pending
void ev_invoke_pending (struct ev_loop *loop ) { pendingpri = NUMPRI; while (pendingpri) /* pendingpri possibly gets modified in the inner loop */ { --pendingpri; while (pendingcnt [pendingpri]) { ANPENDING *p = pendings [pendingpri] + --pendingcnt [pendingpri]; p->w->pending = 0; EV_CB_INVOKE (p->w, p->events); } } }
该函数主要是,调用所有当前处于PENDING状态的监视器的回调函数。根据优先级pendingpri从高到底,(loop->pendingcnt) [pendingpri]表示PENDING状态的,优先级为pendingpri的监视器个数。从后向前轮训(loop->pendings)[pendingpri]数组,调用每个监视器的回调函数。并且置w->pending = 0。
二:总结
三:例子
ev_io io_w; void io_action(struct ev_loop *main_loop, ev_io *io_w, int e) { int rst; char buf[1024] = {'\0'}; rst = read(io_w->fd, buf, sizeof(buf)); if(rst <= 0) { close(io_w->fd); printf("client over\n"); ev_io_stop(main_loop,io_w); return; } buf[1023] = '\0'; printf("Read in a string: %s \n",buf); write(io_w->fd, buf, strlen(buf)); } int socketfd() { int listenfd = socket(AF_INET, SOCK_STREAM, 0); if (listenfd < 0) { perror("socket error"); return -1; } struct sockaddr_in serveraddr; struct sockaddr_in clientaddr; int addrlen = sizeof(struct sockaddr_in); serveraddr.sin_family = AF_INET; serveraddr.sin_addr.s_addr = htonl(INADDR_ANY); serveraddr.sin_port = htons(8898); if(bind(listenfd, (struct sockaddr *)&serveraddr, sizeof(struct sockaddr_in)) < 0) { perror("bind error"); return -1; } if(listen(listenfd, 5) < 0) { perror("listen error"); return -1; } int connectfd = 0; connectfd = accept(listenfd, (struct sockaddr *)&clientaddr, (socklen_t *)&addrlen); if(connectfd < 0) { perror("accept error"); return -1; } return connectfd; } int main() { int fd = socketfd(); if(fd < 0) return; struct ev_loop *main_loop = ev_default_loop(0); ev_init(&io_w,io_action); ev_io_set(&io_w,fd,EV_READ); ev_io_start(main_loop,&io_w); ev_run(main_loop,0); return; }