glibc-2.19 之 strlen 实现
前几天遇到一个有意思的问题,实现strlen 不考虑线程安全:
下面是我的实现:
1 size_t strlen(const char* s) 2 { 3 const char* p = s; 4 while (*p++); 5 return p-1-s; 6 }
Glibc 2.19 的实现:
针对此实现,函数头部分没太明白, size_t strlen (str) const char *str; 详细情况参见Glibc 2.19, 下面的实现还是比较经典的兼顾性能和体系机构。
1 /* Return the length of the null-terminated string STR. Scan for 2 the null terminator quickly by testing four bytes at a time. */ 3 size_t 4 strlen (str) 5 const char *str; 6 { 7 const char *char_ptr; 8 const unsigned long int *longword_ptr; 9 unsigned long int longword, himagic, lomagic; 10 11 /* Handle the first few characters by reading one character at a time. 12 Do this until CHAR_PTR is aligned on a longword boundary. */ 13 for (char_ptr = str; ((unsigned long int) char_ptr 14 & (sizeof (longword) - 1)) != 0; 15 ++char_ptr) 16 if (*char_ptr == '\0') 17 return char_ptr - str; 18 19 /* All these elucidatory comments refer to 4-byte longwords, 20 but the theory applies equally well to 8-byte longwords. */ 21 22 longword_ptr = (unsigned long int *) char_ptr; 23 24 /* Bits 31, 24, 16, and 8 of this number are zero. Call these bits 25 the "holes." Note that there is a hole just to the left of 26 each byte, with an extra at the end: 27 28 bits: 01111110 11111110 11111110 11111111 29 bytes: AAAAAAAA BBBBBBBB CCCCCCCC DDDDDDDD 30 31 The 1-bits make sure that carries propagate to the next 0-bit. 32 The 0-bits provide holes for carries to fall into. */ 33 himagic = 0x80808080L; 34 lomagic = 0x01010101L; 35 if (sizeof (longword) > 4) 36 { 37 /* 64-bit version of the magic. */ 38 /* Do the shift in two steps to avoid a warning if long has 32 bits. */ 39 himagic = ((himagic << 16) << 16) | himagic; 40 lomagic = ((lomagic << 16) << 16) | lomagic; 41 } 42 if (sizeof (longword) > 8) 43 abort (); 44 45 /* Instead of the traditional loop which tests each character, 46 we will test a longword at a time. The tricky part is testing 47 if *any of the four* bytes in the longword in question are zero. */ 48 for (;;) 49 { 50 longword = *longword_ptr++; 51 52 if (((longword - lomagic) & ~longword & himagic) != 0) 53 { 54 /* Which of the bytes was the zero? If none of them were, it was 55 a misfire; continue the search. */ 56 57 const char *cp = (const char *) (longword_ptr - 1); 58 59 if (cp[0] == 0) 60 return cp - str; 61 if (cp[1] == 0) 62 return cp - str + 1; 63 if (cp[2] == 0) 64 return cp - str + 2; 65 if (cp[3] == 0) 66 return cp - str + 3; 67 if (sizeof (longword) > 4) 68 { 69 if (cp[4] == 0) 70 return cp - str + 4; 71 if (cp[5] == 0) 72 return cp - str + 5; 73 if (cp[6] == 0) 74 return cp - str + 6; 75 if (cp[7] == 0) 76 return cp - str + 7; 77 } 78 } 79 } 80 }