兼容90%标准C的词法分析器
不能分词八进制和数字类型加前/后缀的情况
拿这个词法分析器跑了一遍整个Nginx源码,基本都能正确的分出结果,后面有测试例子~
1 #ifndef _STATES_H_ 2 #define _STATES_H_ 3 4 #include "log_config.h" 5 6 __LOG_BEGIN_NAMESPACE 7 8 enum _states 9 { 10 IN_NONE, 11 IN_NUM, 12 IN_0XNUM, 13 IN_INT, 14 IN_FLOAT, 15 IN_E, 16 IN_ALPHA, 17 IN_MINUS, 18 IN_PLUS, 19 IN_MULTIPLE, 20 IN_EXCLAMATION_POINT, 21 IN_AND, 22 IN_DIVIDE, 23 IN_PERCENT, 24 IN_LEFT_NARROW, 25 IN_RIGHT_NARROW, 26 IN_EQUAL, 27 IN_XOR, 28 IN_OR, 29 IN_DOT, 30 IN_C_COMMENT, 31 IN_CPP_COMMENT, 32 IN_FIRST_DOUBLE_QUOTATION_MARKS, 33 IN_SINGLE_QUOTATION_MARKS, 34 IN_COLON 35 }; 36 37 __LOG_END_NAMESPACE 38 39 #endif
1 #ifndef _TOKEN_H_ 2 #define _TOKEN_H_ 3 4 #include <string> 5 6 #include "log_config.h" 7 8 __LOG_BEGIN_NAMESPACE 9 enum token_attr 10 { 11 NONE,//表示空属性 12 KEYWORD,//关键字 (if, else, ...) 13 INT, //整数 (1, 100, ...) 14 FLOAT,//浮点数 (0.1, 3.14, ...) 15 SYMBOL,//符号 (=, <, *, &, (, )...) 16 VARIABLE,//变量 (变量名, 函数名, 结构体名...) 17 STRING 18 //....... 19 }; 20 struct token 21 { 22 token() :name(""), line_no(0), attr(NONE) {} 23 24 std::string name; 25 long line_no; 26 enum token_attr attr; 27 28 void set_all(const std::string& _name, 29 const long _line, enum token_attr _attr) 30 { 31 set_name(_name); 32 set_line(_line); 33 set_attr(_attr); 34 } 35 void set_name(const std::string& str){ name = str; } 36 void set_line(const long l){ line_no = l; } 37 void set_attr(enum token_attr a){ attr = a; } 38 }; 39 __LOG_END_NAMESPACE 40 #endif
1 #ifndef _SCANNER_H_ 2 #define _SCANNER_H_ 3 4 #include <algorithm> 5 #include <string> 6 #include <vector> 7 8 #include "log_config.h" 9 #include "token.h" 10 11 __LOG_BEGIN_NAMESPACE 12 //struct token; 13 class scanner 14 { 15 public: 16 explicit scanner(const std::string& _code) :code(_code), index(0), line_no(0) {} 17 //explicit scanner(const std::string& file_name, std::ifstream::openmode opm); 18 std::vector<token> get_all_tokens(); 19 inline bool is_num(const char& ch);//ch是数字 20 inline bool is_alpha(const char& ch);//ch是字母 21 inline bool is_symbol(const char& ch);//ch是符号(=,<,*,&,(,)...) 22 inline bool is_variable(const char& ch);//ch是变量 23 inline char get_next_char(); 24 inline void put_char(); 25 private: 26 std::string code; 27 std::string::size_type index; 28 long line_no; 29 30 std::vector<token> tok_vec; 31 }; 32 __LOG_END_NAMESPACE 33 #endif
1 #include <cctype> 2 #include <fstream> 3 #include <iostream> 4 #include <set> 5 6 #include "keywords.h" 7 #include "log_config.h" 8 #include "scanner.h" 9 #include "states.h" 10 11 __LOG_BEGIN_NAMESPACE 12 13 std::vector<token> scanner::get_all_tokens(){ 14 token tok; 15 std::string str; 16 long line = 1; 17 enum _states state = IN_NONE; 18 char ch = get_next_char(); 19 //跳过前导空白符 20 while (isblank(ch) || isspace(ch) || ch == '\n') 21 { 22 if (ch == '\n') 23 ++line; 24 ch = get_next_char(); 25 } 26 put_char(); 27 //开始分词 28 while (1){ 29 ch = get_next_char(); 30 if (ch == '\n' && state != IN_CPP_COMMENT) 31 { 32 ++line; 33 continue; 34 } 35 if (ch == EOF || index == code.size()) 36 break; 37 switch (state) 38 { 39 //------------------------------------- 40 case IN_NONE://初始状态 41 if (is_num(ch)) 42 { 43 str = ch; 44 state = IN_NUM; 45 } 46 else if (is_variable(ch)) 47 { 48 str = ch; 49 state = IN_ALPHA; 50 } 51 else if (ch == '(' || ch == ')' || 52 ch == '[' || ch == ']' || 53 ch == ',' || ch == '~' || 54 ch == '?' || //ch == ':' || 55 ch == ';' || ch == '{' || 56 ch == '}' || ch == '#') 57 { 58 str = ch; 59 tok.set_all(str, line, SYMBOL); 60 this->tok_vec.push_back(tok); 61 str.clear(); 62 state = IN_NONE; 63 } 64 else if (ch == '-') 65 { 66 str = ch; 67 state = IN_MINUS; 68 } 69 else if (ch == '!') 70 { 71 str = ch; 72 state = IN_EXCLAMATION_POINT; 73 } 74 else if (ch == '+') 75 { 76 str = ch; 77 state = IN_PLUS; 78 } 79 else if (ch == '*') 80 { 81 str = ch; 82 state = IN_MULTIPLE; 83 } 84 else if (ch == '&') 85 { 86 str = ch; 87 state = IN_AND; 88 } 89 else if (ch == '/') 90 { 91 str = ch; 92 state = IN_DIVIDE; 93 } 94 else if (ch == '%') 95 { 96 str = ch; 97 state = IN_PERCENT; 98 } 99 else if (ch == '<') 100 { 101 str = ch; 102 state = IN_LEFT_NARROW; 103 } 104 else if (ch == '>') 105 { 106 str = ch; 107 state = IN_RIGHT_NARROW; 108 } 109 else if (ch == '=') 110 { 111 str = ch; 112 state = IN_EQUAL; 113 } 114 else if (ch == '^') 115 { 116 str = ch; 117 state = IN_XOR; 118 } 119 else if (ch == '|') 120 { 121 str = ch; 122 state = IN_OR; 123 } 124 else if (ch == '.') 125 { 126 str = ch; 127 state = IN_DOT; 128 } 129 else if (ch == '"') 130 { 131 str = ch; 132 state = IN_FIRST_DOUBLE_QUOTATION_MARKS; 133 } 134 else if (ch == ':') 135 { 136 str = ch; 137 state = IN_COLON; 138 } 139 else if (ch == '\'') 140 { 141 str = ch; 142 state = IN_SINGLE_QUOTATION_MARKS; 143 } 144 break; 145 //------------------------------------- 146 case IN_NUM: 147 if (is_num(ch)) 148 { 149 str += ch; 150 state = IN_NUM; 151 } 152 else if (ch == 'e' || ch == 'E') 153 { 154 str += ch; 155 state = IN_E; 156 } 157 else if (ch == '.') 158 { 159 str += ch; 160 state = IN_DOT; 161 } 162 else 163 { 164 if (str == "0" && (ch == 'X' || ch == 'x')) 165 { 166 str += ch; 167 state = IN_0XNUM; 168 } 169 else 170 { 171 put_char(); 172 tok.set_all(str, line, INT); 173 this->tok_vec.push_back(tok); 174 state = IN_NONE; 175 } 176 } 177 break; 178 //------------------------------------- 179 case IN_0XNUM: 180 if (is_num(ch) || 181 (ch >= 'A' && ch <= 'F') || 182 (ch >= 'a' && ch <= 'f')) 183 { 184 str += ch; 185 state = IN_0XNUM; 186 } 187 else 188 { 189 put_char(); 190 tok.set_all(str, line, INT); 191 this->tok_vec.push_back(tok); 192 str.clear(); 193 state = IN_NONE; 194 } 195 break; 196 //------------------------------------- 197 case IN_E: 198 if (ch == '-') 199 { 200 str += ch; 201 state = IN_MINUS; 202 } 203 else if (ch == '+') 204 { 205 str += ch; 206 state = IN_PLUS; 207 } 208 else if (is_num(ch)) 209 { 210 str += ch; 211 state = IN_NUM; 212 } 213 break; 214 //------------------------------------- 215 case IN_ALPHA: 216 if (is_alpha(ch) || is_num(ch) || ch == '_') 217 { 218 str += ch; 219 state = IN_ALPHA; 220 } 221 else 222 { 223 put_char(); 224 if (keywords.find(str) == keywords.end()) 225 { 226 tok.set_all(str, line, VARIABLE); 227 } 228 else 229 { 230 tok.set_all(str, line, KEYWORD); 231 } 232 this->tok_vec.push_back(tok); 233 str.clear(); 234 state = IN_NONE; 235 } 236 break; 237 //------------------------------------- 238 case IN_MINUS: 239 if (ch == '>') 240 {// -> 241 str += ch; 242 tok.set_all(str, line, SYMBOL); 243 this->tok_vec.push_back(tok); 244 str.clear(); 245 state = IN_NONE; 246 } 247 else if (ch == '-') 248 {// -- 249 str += ch; 250 tok.set_all(str, line, SYMBOL); 251 this->tok_vec.push_back(tok); 252 str.clear(); 253 state = IN_NONE; 254 } 255 else if (ch == '=') 256 {// -= 257 str += ch; 258 tok.set_all(str, line, SYMBOL); 259 this->tok_vec.push_back(tok); 260 str.clear(); 261 state = IN_NONE; 262 } 263 else if (is_num(ch)) 264 {//负数 265 str += ch; 266 state = IN_NUM; 267 } 268 else 269 {//'-' 270 put_char(); 271 tok.set_all(str, line, SYMBOL); 272 this->tok_vec.push_back(tok); 273 str.clear(); 274 state = IN_NONE; 275 } 276 break; 277 //------------------------------------- 278 case IN_EXCLAMATION_POINT: 279 if (ch == '=') 280 { // != 281 str += ch; 282 tok.set_all(str, line, SYMBOL); 283 this->tok_vec.push_back(tok); 284 str.clear(); 285 state = IN_NONE; 286 } 287 else 288 {//'!' 289 put_char(); 290 tok.set_all(str, line, SYMBOL); 291 this->tok_vec.push_back(tok); 292 str.clear(); 293 state = IN_NONE; 294 } 295 break; 296 //------------------------------ 297 case IN_PLUS: 298 if (ch == '+') 299 {// ++ 300 str += ch; 301 tok.set_all(str, line, SYMBOL); 302 this->tok_vec.push_back(tok); 303 str.clear(); 304 state = IN_NONE; 305 } 306 else if (ch == '=') 307 {// += 308 str += ch; 309 tok.set_all(str, line, SYMBOL); 310 this->tok_vec.push_back(tok); 311 str.clear(); 312 state = IN_NONE; 313 } 314 else if (is_num(ch)) 315 {//正数 316 str += ch; 317 state = IN_NUM; 318 } 319 else 320 {//'+' 321 put_char(); 322 tok.set_all(str, line, SYMBOL); 323 this->tok_vec.push_back(tok); 324 str.clear(); 325 state = IN_NONE; 326 } 327 break; 328 //------------------------- 329 case IN_MULTIPLE: 330 if (ch == '=') 331 {// *= 332 str += ch; 333 tok.set_all(str, line, SYMBOL); 334 this->tok_vec.push_back(tok); 335 str.clear(); 336 state = IN_NONE; 337 } 338 else 339 {//'*' 340 put_char(); 341 tok.set_all(str, line, SYMBOL); 342 this->tok_vec.push_back(tok); 343 str.clear(); 344 state = IN_NONE; 345 } 346 break; 347 //--------------------- 348 case IN_AND: 349 if (ch == '&') 350 {// && 351 str += ch; 352 tok.set_all(str, line, SYMBOL); 353 this->tok_vec.push_back(tok); 354 str.clear(); 355 state = IN_NONE; 356 } 357 else if (ch == '=') 358 {// &= 359 str += ch; 360 tok.set_all(str, line, SYMBOL); 361 this->tok_vec.push_back(tok); 362 str.clear(); 363 state = IN_NONE; 364 } 365 else 366 {// & 367 put_char(); 368 tok.set_all(str, line, SYMBOL); 369 this->tok_vec.push_back(tok); 370 str.clear(); 371 state = IN_NONE; 372 } 373 break; 374 //--------------------- 375 case IN_DIVIDE: 376 if (ch == '=') 377 {// /= 378 str += ch; 379 tok.set_all(str, line, SYMBOL); 380 this->tok_vec.push_back(tok); 381 str.clear(); 382 state = IN_NONE; 383 } 384 else if (ch == '/') 385 {// // 386 str.clear(); 387 state = IN_CPP_COMMENT; 388 } 389 else if (ch == '*') 390 {// /* 391 str.clear(); 392 state = IN_C_COMMENT; 393 } 394 else 395 {// / 396 put_char(); 397 tok.set_all(str, line, SYMBOL); 398 this->tok_vec.push_back(tok); 399 str.clear(); 400 state = IN_NONE; 401 } 402 break; 403 //---------------------------- 404 case IN_C_COMMENT: 405 if (ch == '*') 406 { 407 ch = get_next_char(); 408 if (ch == '/') 409 { 410 state = IN_NONE; 411 continue; 412 } 413 else 414 { 415 state = IN_C_COMMENT; 416 } 417 } 418 else 419 { 420 state = IN_C_COMMENT; 421 } 422 break; 423 //---------------------------- 424 case IN_CPP_COMMENT: 425 if (ch == '\n') 426 { 427 put_char(); 428 state = IN_NONE; 429 } 430 else 431 { 432 state = IN_CPP_COMMENT; 433 } 434 break; 435 //---------------------------- 436 case IN_PERCENT: 437 if (ch == '=') 438 {// %= 439 str += ch; 440 tok.set_all(str, line, SYMBOL); 441 this->tok_vec.push_back(tok); 442 str.clear(); 443 state = IN_NONE; 444 } 445 else 446 {// % 447 put_char(); 448 tok.set_all(str, line, SYMBOL); 449 this->tok_vec.push_back(tok); 450 str.clear(); 451 state = IN_NONE; 452 } 453 break; 454 //------------------------ 455 case IN_LEFT_NARROW: 456 if (ch == '<') 457 {// << 458 str += ch; 459 tok.set_all(str, line, SYMBOL); 460 this->tok_vec.push_back(tok); 461 str.clear(); 462 state = IN_NONE; 463 } 464 else if (ch == '=') 465 {// <= 466 str += ch; 467 tok.set_all(str, line, SYMBOL); 468 this->tok_vec.push_back(tok); 469 str.clear(); 470 state = IN_NONE; 471 } 472 else 473 {// < 474 put_char(); 475 tok.set_all(str, line, SYMBOL); 476 this->tok_vec.push_back(tok); 477 str.clear(); 478 state = IN_NONE; 479 } 480 break; 481 //------------------------ 482 case IN_RIGHT_NARROW: 483 if (ch == '>') 484 {// >> 485 str += ch; 486 tok.set_all(str, line, SYMBOL); 487 this->tok_vec.push_back(tok); 488 str.clear(); 489 state = IN_NONE; 490 } 491 else if (ch == '=') 492 {// >= 493 str += ch; 494 tok.set_all(str, line, SYMBOL); 495 this->tok_vec.push_back(tok); 496 str.clear(); 497 state = IN_NONE; 498 } 499 else 500 {// > 501 put_char(); 502 tok.set_all(str, line, SYMBOL); 503 this->tok_vec.push_back(tok); 504 str.clear(); 505 state = IN_NONE; 506 } 507 break; 508 //---------------------------- 509 case IN_EQUAL: 510 if (ch == '=') 511 {// == 512 str += ch; 513 tok.set_all(str, line, SYMBOL); 514 this->tok_vec.push_back(tok); 515 str.clear(); 516 state = IN_NONE; 517 } 518 else 519 {// = 520 put_char(); 521 tok.set_all(str, line, SYMBOL); 522 this->tok_vec.push_back(tok); 523 str.clear(); 524 state = IN_NONE; 525 } 526 break; 527 //------------------------- 528 case IN_XOR: 529 if (ch == '=') 530 {// ^= 531 str += ch; 532 tok.set_all(str, line, SYMBOL); 533 this->tok_vec.push_back(tok); 534 str.clear(); 535 state = IN_NONE; 536 } 537 else 538 {// ^ 539 put_char(); 540 tok.set_all(str, line, SYMBOL); 541 this->tok_vec.push_back(tok); 542 str.clear(); 543 state = IN_NONE; 544 } 545 break; 546 //--------------------------- 547 case IN_OR: 548 if (ch == '|') 549 {// || 550 str += ch; 551 tok.set_all(str, line, SYMBOL); 552 this->tok_vec.push_back(tok); 553 str.clear(); 554 state = IN_NONE; 555 } 556 else if (ch == '=') 557 {// |= 558 str += ch; 559 tok.set_all(str, line, SYMBOL); 560 this->tok_vec.push_back(tok); 561 str.clear(); 562 state = IN_NONE; 563 } 564 else 565 {// | 566 put_char(); 567 tok.set_all(str, line, SYMBOL); 568 this->tok_vec.push_back(tok); 569 str.clear(); 570 state = IN_NONE; 571 } 572 break; 573 //--------------------- 574 case IN_DOT: 575 if (is_num(ch)) 576 { 577 str += ch; 578 state = IN_NUM; 579 } 580 else 581 { 582 put_char(); 583 tok.set_all(str, line, SYMBOL); 584 this->tok_vec.push_back(tok); 585 str.clear(); 586 state = IN_NONE; 587 } 588 break; 589 //------------------- 590 case IN_FIRST_DOUBLE_QUOTATION_MARKS: 591 if (ch == '\\') 592 {//遇到转义符 593 str += ch; 594 ch = get_next_char(); 595 if (ch == '"') 596 { 597 str += ch; 598 } 599 else 600 { 601 put_char(); 602 } 603 state = IN_FIRST_DOUBLE_QUOTATION_MARKS; 604 } 605 else if (ch == '"') 606 { 607 str += ch; 608 tok.set_all(str, line, STRING); 609 this->tok_vec.push_back(tok); 610 str.clear(); 611 state = IN_NONE; 612 } 613 else 614 { 615 str += ch; 616 state = IN_FIRST_DOUBLE_QUOTATION_MARKS; 617 } 618 break; 619 //--------------------------- 620 case IN_COLON: 621 if (ch == ':') 622 { 623 str += ch; 624 tok.set_all(str, line, SYMBOL); 625 this->tok_vec.push_back(tok); 626 str.clear(); 627 state = IN_NONE; 628 } 629 else 630 { 631 put_char(); 632 tok.set_all(str, line, SYMBOL); 633 this->tok_vec.push_back(tok); 634 str.clear(); 635 state = IN_NONE; 636 } 637 break; 638 //-------------------------- 639 case IN_SINGLE_QUOTATION_MARKS: 640 if (ch != '\'') 641 { 642 str += ch; 643 state = IN_SINGLE_QUOTATION_MARKS; 644 } 645 else 646 { 647 str += ch; 648 tok.set_all(str, line, STRING); 649 this->tok_vec.push_back(tok); 650 str.clear(); 651 state = IN_NONE; 652 } 653 break; 654 //-------------------------- 655 default: 656 break; 657 }// switch 658 }// while 659 return std::move(tok_vec); 660 } 661 662 bool scanner::is_alpha(const char& ch) 663 { 664 if ((ch >= 'A' && ch <= 'Z') || 665 (ch >= 'a' && ch <= 'z')) 666 return true; 667 return false; 668 } 669 bool scanner::is_num(const char& ch) 670 { 671 if (ch >= '0' && ch <= '9') 672 return true; 673 return false; 674 } 675 bool scanner::is_variable(const char& ch) 676 { 677 if (ch == '_' || is_alpha(ch)) 678 return true; 679 return false; 680 } 681 char scanner::get_next_char() 682 { 683 return code[index++]; 684 } 685 void scanner::put_char() 686 { 687 --index; 688 } 689 //scanner::scanner(const std::string& file_name, std::ifstream::openmode opm) 690 //{ 691 // std::ifstream ifs; 692 // ifs.open(file_name, opm); 693 // if (ifs) 694 // { 695 // ifs.seekg(0, ifs.end); 696 // std::size_t len = ifs.tellg(); 697 // ifs.seekg(0, ifs.beg); 698 // code.resize(len + 1); 699 // ifs.read((char*)&*(code.begin()), len); 700 // code[len ] = '\0'; 701 // 702 // 703 // std::cout << code << std::endl; 704 // } 705 // ifs.close(); 706 //} 707 __LOG_END_NAMESPACE
测试如下:
1 #include <iostream> 2 #include <vector> 3 #include <fstream> 4 5 #include "scanner.h" 6 7 using namespace std; 8 int main() 9 { 10 string file_name("C:\\Users\\zxh\\Desktop\\test1.c"); 11 ofstream ofs("C:\\Users\\zxh\\Desktop\\result.c"); 12 std::ifstream ifs; 13 ifs.open(file_name, ifstream::binary); 14 if (ifs) 15 { 16 ifs.seekg(0, ifs.end); 17 std::size_t len = ifs.tellg(); 18 ifs.seekg(0, ifs.beg); 19 string _code; 20 _code.resize(len + 1); 21 ifs.read((char*)&*(_code.begin()), len); 22 _code[len + 1] = '\0'; 23 ifs.close(); 24 scanner s(_code); 25 vector<token> v = s.get_all_tokens(); 26 for (const auto s : v) 27 { 28 ofs << s.name << " " << s.line_no << " "; 29 30 } 31 } 32 system("pause"); 33 return 0; 34 }
分词的代码选取为Nginx源码下的一个函数
1 static ngx_int_t 2 ngx_epoll_process_events(ngx_cycle_t *cycle, ngx_msec_t timer, ngx_uint_t flags) 3 { 4 int events; 5 uint32_t revents; 6 ngx_int_t instance, i; 7 ngx_uint_t level; 8 ngx_err_t err; 9 ngx_event_t *rev, *wev, **queue; 10 ngx_connection_t *c; 11 12 int i = 0XFFFFFFFF, 0X1234defc, 0x3333; 13 14 /* NGX_TIMER_INFINITE == INFTIM */ 15 16 ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, 17 "epoll timer: %M", timer); 18 19 events = epoll_wait(ep, event_list, (int) nevents, timer); 20 21 err = (events == -1) ? ngx_errno : 0; 22 23 if (flags & NGX_UPDATE_TIME || ngx_event_timer_alarm) { 24 ngx_time_update(); 25 } 26 27 if (err) { 28 if (err == NGX_EINTR) { 29 30 if (ngx_event_timer_alarm) { 31 ngx_event_timer_alarm = 0; 32 return NGX_OK; 33 } 34 35 level = NGX_LOG_INFO; 36 37 } else { 38 level = NGX_LOG_ALERT; 39 } 40 41 ngx_log_error(level, cycle->log, err, "epoll_wait() failed"); 42 return NGX_ERROR; 43 } 44 45 if (events == 0) { 46 if (timer != NGX_TIMER_INFINITE) { 47 return NGX_OK; 48 } 49 50 ngx_log_error(NGX_LOG_ALERT, cycle->log, 0, 51 "epoll_wait() returned no events without timeout"); 52 return NGX_ERROR; 53 } 54 55 ngx_mutex_lock(ngx_posted_events_mutex); 56 57 for (i = 0; i < events; i++) { 58 c = event_list[i].data.ptr; 59 60 instance = (uintptr_t) c & 1; 61 c = (ngx_connection_t *) ((uintptr_t) c & (uintptr_t) ~1); 62 63 rev = c->read; 64 65 if (c->fd == -1 || rev->instance != instance) { 66 67 /* 68 * the stale event from a file descriptor 69 * that was just closed in this iteration 70 */ 71 72 ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, 73 "epoll: stale event %p", c); 74 continue; 75 } 76 77 revents = event_list[i].events; 78 79 ngx_log_debug3(NGX_LOG_DEBUG_EVENT, cycle->log, 0, 80 "epoll: fd:%d ev:%04XD d:%p", 81 c->fd, revents, event_list[i].data.ptr); 82 83 if (revents & (EPOLLERR|EPOLLHUP)) { 84 ngx_log_debug2(NGX_LOG_DEBUG_EVENT, cycle->log, 0, 85 "epoll_wait() error on fd:%d ev:%04XD", 86 c->fd, revents); 87 } 88 89 #if 0 90 if (revents & ~(EPOLLIN|EPOLLOUT|EPOLLERR|EPOLLHUP)) { 91 ngx_log_error(NGX_LOG_ALERT, cycle->log, 0, 92 "strange epoll_wait() events fd:%d ev:%04XD", 93 c->fd, revents); 94 } 95 #endif 96 97 if ((revents & (EPOLLERR|EPOLLHUP)) 98 && (revents & (EPOLLIN|EPOLLOUT)) == 0) 99 { 100 /* 101 * if the error events were returned without EPOLLIN or EPOLLOUT, 102 * then add these flags to handle the events at least in one 103 * active handler 104 */ 105 106 revents |= EPOLLIN|EPOLLOUT; 107 } 108 109 if ((revents & EPOLLIN) && rev->active) { 110 111 #if (NGX_HAVE_EPOLLRDHUP) 112 if (revents & EPOLLRDHUP) { 113 rev->pending_eof = 1; 114 } 115 #endif 116 117 if ((flags & NGX_POST_THREAD_EVENTS) && !rev->accept) { 118 rev->posted_ready = 1; 119 120 } else { 121 rev->ready = 1; 122 } 123 124 if (flags & NGX_POST_EVENTS) { 125 queue = (ngx_event_t **) (rev->accept ? 126 &ngx_posted_accept_events : &ngx_posted_events); 127 128 ngx_locked_post_event(rev, queue); 129 130 } else { 131 rev->handler(rev); 132 } 133 } 134 135 wev = c->write; 136 137 if ((revents & EPOLLOUT) && wev->active) { 138 139 if (c->fd == -1 || wev->instance != instance) { 140 141 /* 142 * the stale event from a file descriptor 143 * that was just closed in this iteration 144 */ 145 146 ngx_log_debug1(NGX_LOG_DEBUG_EVENT, cycle->log, 0, 147 "epoll: stale event %p", c); 148 continue; 149 } 150 151 if (flags & NGX_POST_THREAD_EVENTS) { 152 wev->posted_ready = 1; 153 154 } else { 155 wev->ready = 1; 156 } 157 158 if (flags & NGX_POST_EVENTS) { 159 ngx_locked_post_event(wev, &ngx_posted_events); 160 161 } else { 162 wev->handler(wev); 163 } 164 } 165 } 166 167 ngx_mutex_unlock(ngx_posted_events_mutex); 168 169 return NGX_OK; 170 }
分词结果如下
1 static 1 2 ngx_int_t 1 3 ngx_epoll_process_events 2 4 ( 2 5 ngx_cycle_t 2 6 * 2 7 cycle 2 8 , 2 9 ngx_msec_t 2 10 timer 2 11 , 2 12 ngx_uint_t 2 13 flags 2 14 ) 2 15 { 3 16 int 4 17 events 4 18 ; 4 19 uint32_t 5 20 revents 5 21 ; 5 22 ngx_int_t 6 23 instance 6 24 , 6 25 i 6 26 ; 6 27 ngx_uint_t 7 28 level 7 29 ; 7 30 ngx_err_t 8 31 err 8 32 ; 8 33 ngx_event_t 9 34 * 9 35 rev 9 36 , 9 37 * 9 38 wev 9 39 , 9 40 * 9 41 * 9 42 queue 9 43 ; 9 44 ngx_connection_t 10 45 * 10 46 c 10 47 ; 10 48 int 12 49 i 12 50 = 12 51 0XFFFFFFFF 12 52 , 12 53 0X1234defc 12 54 , 12 55 0x3333 12 56 ; 12 57 ngx_log_debug1 16 58 ( 16 59 NGX_LOG_DEBUG_EVENT 16 60 , 16 61 cycle 16 62 -> 16 63 log 16 64 , 16 65 0 16 66 , 16 67 "epoll timer: %M" 17 68 , 17 69 timer 17 70 ) 17 71 ; 17 72 events 19 73 = 19 74 epoll_wait 19 75 ( 19 76 ep 19 77 , 19 78 event_list 19 79 , 19 80 ( 19 81 int 19 82 ) 19 83 nevents 19 84 , 19 85 timer 19 86 ) 19 87 ; 19 88 err 21 89 = 21 90 ( 21 91 events 21 92 == 21 93 -1 21 94 ) 21 95 ? 21 96 ngx_errno 21 97 : 21 98 0 21 99 ; 21 100 if 23 101 ( 23 102 flags 23 103 & 23 104 NGX_UPDATE_TIME 23 105 || 23 106 ngx_event_timer_alarm 23 107 ) 23 108 { 23 109 ngx_time_update 24 110 ( 24 111 ) 24 112 ; 24 113 } 25 114 if 27 115 ( 27 116 err 27 117 ) 27 118 { 27 119 if 28 120 ( 28 121 err 28 122 == 28 123 NGX_EINTR 28 124 ) 28 125 { 28 126 if 30 127 ( 30 128 ngx_event_timer_alarm 30 129 ) 30 130 { 30 131 ngx_event_timer_alarm 31 132 = 31 133 0 31 134 ; 31 135 return 32 136 NGX_OK 32 137 ; 32 138 } 33 139 level 35 140 = 35 141 NGX_LOG_INFO 35 142 ; 35 143 } 37 144 else 37 145 { 37 146 level 38 147 = 38 148 NGX_LOG_ALERT 38 149 ; 38 150 } 39 151 ngx_log_error 41 152 ( 41 153 level 41 154 , 41 155 cycle 41 156 -> 41 157 log 41 158 , 41 159 err 41 160 , 41 161 "epoll_wait() failed" 41 162 ) 41 163 ; 41 164 return 42 165 NGX_ERROR 42 166 ; 42 167 } 43 168 if 45 169 ( 45 170 events 45 171 == 45 172 0 45 173 ) 45 174 { 45 175 if 46 176 ( 46 177 timer 46 178 != 46 179 NGX_TIMER_INFINITE 46 180 ) 46 181 { 46 182 return 47 183 NGX_OK 47 184 ; 47 185 } 48 186 ngx_log_error 50 187 ( 50 188 NGX_LOG_ALERT 50 189 , 50 190 cycle 50 191 -> 50 192 log 50 193 , 50 194 0 50 195 , 50 196 "epoll_wait() returned no events without timeout" 51 197 ) 51 198 ; 51 199 return 52 200 NGX_ERROR 52 201 ; 52 202 } 53 203 ngx_mutex_lock 55 204 ( 55 205 ngx_posted_events_mutex 55 206 ) 55 207 ; 55 208 for 57 209 ( 57 210 i 57 211 = 57 212 0 57 213 ; 57 214 i 57 215 < 57 216 events 57 217 ; 57 218 i 57 219 ++ 57 220 ) 57 221 { 57 222 c 58 223 = 58 224 event_list 58 225 [ 58 226 i 58 227 ] 58 228 . 58 229 data 58 230 . 58 231 ptr 58 232 ; 58 233 instance 60 234 = 60 235 ( 60 236 uintptr_t 60 237 ) 60 238 c 60 239 & 60 240 1 60 241 ; 60 242 c 61 243 = 61 244 ( 61 245 ngx_connection_t 61 246 * 61 247 ) 61 248 ( 61 249 ( 61 250 uintptr_t 61 251 ) 61 252 c 61 253 & 61 254 ( 61 255 uintptr_t 61 256 ) 61 257 ~ 61 258 1 61 259 ) 61 260 ; 61 261 rev 63 262 = 63 263 c 63 264 -> 63 265 read 63 266 ; 63 267 if 65 268 ( 65 269 c 65 270 -> 65 271 fd 65 272 == 65 273 -1 65 274 || 65 275 rev 65 276 -> 65 277 instance 65 278 != 65 279 instance 65 280 ) 65 281 { 65 282 ngx_log_debug1 72 283 ( 72 284 NGX_LOG_DEBUG_EVENT 72 285 , 72 286 cycle 72 287 -> 72 288 log 72 289 , 72 290 0 72 291 , 72 292 "epoll: stale event %p" 73 293 , 73 294 c 73 295 ) 73 296 ; 73 297 continue 74 298 ; 74 299 } 75 300 revents 77 301 = 77 302 event_list 77 303 [ 77 304 i 77 305 ] 77 306 . 77 307 events 77 308 ; 77 309 ngx_log_debug3 79 310 ( 79 311 NGX_LOG_DEBUG_EVENT 79 312 , 79 313 cycle 79 314 -> 79 315 log 79 316 , 79 317 0 79 318 , 79 319 "epoll: fd:%d ev:%04XD d:%p" 80 320 , 80 321 c 81 322 -> 81 323 fd 81 324 , 81 325 revents 81 326 , 81 327 event_list 81 328 [ 81 329 i 81 330 ] 81 331 . 81 332 data 81 333 . 81 334 ptr 81 335 ) 81 336 ; 81 337 if 83 338 ( 83 339 revents 83 340 & 83 341 ( 83 342 EPOLLERR 83 343 | 83 344 EPOLLHUP 83 345 ) 83 346 ) 83 347 { 83 348 ngx_log_debug2 84 349 ( 84 350 NGX_LOG_DEBUG_EVENT 84 351 , 84 352 cycle 84 353 -> 84 354 log 84 355 , 84 356 0 84 357 , 84 358 "epoll_wait() error on fd:%d ev:%04XD" 85 359 , 85 360 c 86 361 -> 86 362 fd 86 363 , 86 364 revents 86 365 ) 86 366 ; 86 367 } 87 368 # 89 369 if 89 370 0 89 371 if 90 372 ( 90 373 revents 90 374 & 90 375 ~ 90 376 ( 90 377 EPOLLIN 90 378 | 90 379 EPOLLOUT 90 380 | 90 381 EPOLLERR 90 382 | 90 383 EPOLLHUP 90 384 ) 90 385 ) 90 386 { 90 387 ngx_log_error 91 388 ( 91 389 NGX_LOG_ALERT 91 390 , 91 391 cycle 91 392 -> 91 393 log 91 394 , 91 395 0 91 396 , 91 397 "strange epoll_wait() events fd:%d ev:%04XD" 92 398 , 92 399 c 93 400 -> 93 401 fd 93 402 , 93 403 revents 93 404 ) 93 405 ; 93 406 } 94 407 # 95 408 endif 95 409 if 97 410 ( 97 411 ( 97 412 revents 97 413 & 97 414 ( 97 415 EPOLLERR 97 416 | 97 417 EPOLLHUP 97 418 ) 97 419 ) 97 420 && 98 421 ( 98 422 revents 98 423 & 98 424 ( 98 425 EPOLLIN 98 426 | 98 427 EPOLLOUT 98 428 ) 98 429 ) 98 430 == 98 431 0 98 432 ) 98 433 { 99 434 revents 106 435 |= 106 436 EPOLLIN 106 437 | 106 438 EPOLLOUT 106 439 ; 106 440 } 107 441 if 109 442 ( 109 443 ( 109 444 revents 109 445 & 109 446 EPOLLIN 109 447 ) 109 448 && 109 449 rev 109 450 -> 109 451 active 109 452 ) 109 453 { 109 454 # 111 455 if 111 456 ( 111 457 NGX_HAVE_EPOLLRDHUP 111 458 ) 111 459 if 112 460 ( 112 461 revents 112 462 & 112 463 EPOLLRDHUP 112 464 ) 112 465 { 112 466 rev 113 467 -> 113 468 pending_eof 113 469 = 113 470 1 113 471 ; 113 472 } 114 473 # 115 474 endif 115 475 if 117 476 ( 117 477 ( 117 478 flags 117 479 & 117 480 NGX_POST_THREAD_EVENTS 117 481 ) 117 482 && 117 483 ! 117 484 rev 117 485 -> 117 486 accept 117 487 ) 117 488 { 117 489 rev 118 490 -> 118 491 posted_ready 118 492 = 118 493 1 118 494 ; 118 495 } 120 496 else 120 497 { 120 498 rev 121 499 -> 121 500 ready 121 501 = 121 502 1 121 503 ; 121 504 } 122 505 if 124 506 ( 124 507 flags 124 508 & 124 509 NGX_POST_EVENTS 124 510 ) 124 511 { 124 512 queue 125 513 = 125 514 ( 125 515 ngx_event_t 125 516 * 125 517 * 125 518 ) 125 519 ( 125 520 rev 125 521 -> 125 522 accept 125 523 ? 125 524 & 126 525 ngx_posted_accept_events 126 526 : 126 527 & 126 528 ngx_posted_events 126 529 ) 126 530 ; 126 531 ngx_locked_post_event 128 532 ( 128 533 rev 128 534 , 128 535 queue 128 536 ) 128 537 ; 128 538 } 130 539 else 130 540 { 130 541 rev 131 542 -> 131 543 handler 131 544 ( 131 545 rev 131 546 ) 131 547 ; 131 548 } 132 549 } 133 550 wev 135 551 = 135 552 c 135 553 -> 135 554 write 135 555 ; 135 556 if 137 557 ( 137 558 ( 137 559 revents 137 560 & 137 561 EPOLLOUT 137 562 ) 137 563 && 137 564 wev 137 565 -> 137 566 active 137 567 ) 137 568 { 137 569 if 139 570 ( 139 571 c 139 572 -> 139 573 fd 139 574 == 139 575 -1 139 576 || 139 577 wev 139 578 -> 139 579 instance 139 580 != 139 581 instance 139 582 ) 139 583 { 139 584 ngx_log_debug1 146 585 ( 146 586 NGX_LOG_DEBUG_EVENT 146 587 , 146 588 cycle 146 589 -> 146 590 log 146 591 , 146 592 0 146 593 , 146 594 "epoll: stale event %p" 147 595 , 147 596 c 147 597 ) 147 598 ; 147 599 continue 148 600 ; 148 601 } 149 602 if 151 603 ( 151 604 flags 151 605 & 151 606 NGX_POST_THREAD_EVENTS 151 607 ) 151 608 { 151 609 wev 152 610 -> 152 611 posted_ready 152 612 = 152 613 1 152 614 ; 152 615 } 154 616 else 154 617 { 154 618 wev 155 619 -> 155 620 ready 155 621 = 155 622 1 155 623 ; 155 624 } 156 625 if 158 626 ( 158 627 flags 158 628 & 158 629 NGX_POST_EVENTS 158 630 ) 158 631 { 158 632 ngx_locked_post_event 159 633 ( 159 634 wev 159 635 , 159 636 & 159 637 ngx_posted_events 159 638 ) 159 639 ; 159 640 } 161 641 else 161 642 { 161 643 wev 162 644 -> 162 645 handler 162 646 ( 162 647 wev 162 648 ) 162 649 ; 162 650 } 163 651 } 164 652 } 165 653 ngx_mutex_unlock 167 654 ( 167 655 ngx_posted_events_mutex 167 656 ) 167 657 ; 167 658 return 169 659 NGX_OK 169 660 ; 169 661 } 170