实现一些字符串操作标准库函数、解决一些字符串问题
一、实现字符串操作标准库函数
(1)、strcpy、strncpy、memmove、memcpy、memset、strlen、strncat 的实现
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
// The strcpy() function copies the string pointed to by src, including the terminating null byte ('\0'), to the buffer
// pointed to by dest. The strings may not overlap, and the destination string dest must be large enough to receive thecopy. char *strcpy(char *dest, const char *src) { assert((src != NULL) && (dest != NULL)); size_t i; for (i = 0; src[i] != '\0'; i++) dest[i] = src[i]; dest[i] = '\0'; return dest; } // The strncpy() function is similar, except that at most n bytes of src are copied. Warning: If there is no null byte among the first n bytes of src, // the string placed in dest will not be null-terminated. If the length of src is less than n, strncpy() pads the remainder of dest with null bytes. char *strncpy(char *dest, const char *src, size_t n) { assert((src != NULL) && (dest != NULL)); size_t i; for (i = 0; i < n && src[i] != '\0'; i++) dest[i] = src[i]; for (; i < n; i++) dest[i] = '\0'; return dest; }
SQL Code
/* 在32位的x86平台上,每次拷贝1个字节需要一条指令,每次拷贝4个字节也只需要一条指 * 令,memcpy函数的实现尽可能4个字节4个字节地拷贝 */ void *memcpy(void *dest, const void *src, size_t n) { assert((src != NULL) && (dest != NULL)); char *d = dest; const char *s = src; int *di; const int *si; int r = n % 4; while (r--) *d++ = *s++; di = (int *)d; si = (const int *)s; n /= 4; while (n--) *di++ = *si++; return dest; } |
C++ Code
1
2 3 4 5 6 7 |
size_t strlen(const char *p)
{ assert(p != NULL);
size_t size = 0; while (*p++ != '\0') ++size; return size; } |
C++ Code
1
2 3 4 5 6 7 8 9 10 11 |
char *strncat(char *dest, const char *src, size_t n)
{ size_t dest_len = strlen(dest); size_t i; for (i = 0 ; i < n && src[i] != '\0' ; i++) dest[dest_len + i] = src[i]; dest[dest_len + i] = '\0'; return dest; } |
不用临时空间的memmove实现:
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
//src可以不保留
void *memmove(void *dst, const void *src, size_t count) { byte *pbTo = (byte *)dst; byte *pbFrom = (byte *)src; assert(dst != NULL && src != NULL);//不能存在空指针 if (dst <= src || pbTo >= pbFrom + count)// { while (count-- > 0) { *pbTo++ = *pbFrom++; //按递增拷贝 } } else // { pbTo = pbTo + count - 1; //overlap的情况,从高位地址向低位拷贝 pbFrom = pbFrom + count - 1; while (count-- > 0) { *pbTo-- = *pbFrom--; //按递减拷贝 } } return dst; } |
memset的实现:
C++ Code
1
2 3 4 5 6 7 8 |
void *memset(void *buffer, int c, int count)
{ char *buffer_p = (char *)buffer; assert(buffer != NULL); while(count-- > 0) *buffer_p++ = (char)c; return buffer; } |
二、解决字符串问题
(1)、将单词之间出现一个或多个连续的空白字符都压缩为1个。
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
//编一个函数,输入一个字符串,要求做一个新字符串,把其中所有的一个或多个连续的空白字符都压缩为一个空格。这里所说的空白包括空格、'\t'、'\n'、'\r'。
char *shrink_space(char *dest, const char *src, size_t n) { assert((src != NULL) && (dest != NULL)); size_t i, j; dest[0] = src[0]; for (i = 1, j = 1; src[i] != '\0'; i++, j++) { if (src[i] == '\t' || src[i] == '\n' || src[i] == '\r' || src[i] == ' ') if (src[i - 1] != '\t' && src[i - 1] != '\n' && src[i - 1] != '\r' && src[i - 1] != ' ') dest[j] = ' '; else j--; else dest[j] = src[i]; } dest[j] = '\0'; return dest; } |
(2)、解析URL 中的路径和查询字符串。?
号后面是查询字符串,由 “key=value”形式的键值对组成,以&隔开。
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
/*************************************************************************
> File Name: find_url_token.c > Author: Simba > Mail: dameng34@163.com > Created Time: Sat 26 Jan 2013 04:05:32 PM CST ************************************************************************/ #include<stdio.h> #include<string.h> #include<stdlib.h> #define N 10 typedef struct { char *tokens[N]; int count; } unit_t; void find_url_token(char str[], const char tok[], unit_t *ptr) { int i; char *token = NULL; char *saveptr = NULL; ptr->count = 0; const char *needle = "://"; if (strstr(str, needle)) { for (i = 0; ; str = NULL, i++) { token = strtok_r(str, tok, &saveptr); if (token == NULL) break; else { ptr->tokens[i] = token; ptr->count++; } } } } int main(void) { /* 不能定义为char *url = "..."; 因为此时是定义一个指向字符串字面值(位于.rodata段)的指针,而 调用strtok_r函数会修改这个字符串,运行时会产生段错误 */ char url[] = "http://www.google.cn/search?complete=1&hl=zh-CN&ie=GB2312&q=linux&meta="; /* 给url初始化用的这个字符串并没有分配在.rodata段,而是直接写在指令里了, * 运行程序时通过movl 指令把字符串写到栈上,这就是url的存储空间*/ unit_t *ptr = malloc(sizeof(unit_t)); find_url_token(url, "?&", ptr); int i; for (i = 0; i < ptr->count; i++) printf("%s\n", ptr->tokens[i]); free(ptr); return 0; } |
(3)、去除\r\n,去除左右空白字符
C++ Code
1
2 3 4 5 6 7 |
void str_trim_crlf(char *str)
{ char *p = &str[strlen(str) - 1]; while (*p == '\r' || *p == '\n') *p-- = '\0'; } |
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
void AllTrim( char *str )
{ char *head, *tail; if ( str == NULL ) return; for( head = str; *head == ' ' || *head == '\t'; head ++ ); for( tail = str + strlen(str) - 1; (*tail == ' ' || *tail == '\t' ) && tail >= head; tail -- ); while( head <= tail ) *str ++ = *head ++; *str = 0; } |
(4)、判断字符串是否为回文
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
// 判断字符串是否为回文
bool isSysmmetry(const char *src) { assert(src != NULL); int len = strlen(src); assert(len != 0); const char *tmp = src + len - 1; int i; for (i = 0; i < len / 2; i++) { if (*src++ != *tmp--) break; } if (i == len / 2) return true; else return false; } |
(5)、google笔试:编码实现求给定字符串(全为小写英文字母)的最小后继,如 “abc” 的最小后继为“abd”, "dhz" 的最小后继为“di"。
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
int MinNextStr(const char *src, char *&minnext)
{ int srclen = strlen(src); minnext = (char *)malloc((srclen + 1) * sizeof(char)); if (minnext == NULL) return -1; strcpy(minnext, src); int i = srclen - 1; while (i >= 0) { minnext[i]++; if (minnext[i] <= 'z') break; i--; } if (i < 0) return 0; else { minnext[++i] = '\0'; return 1; } } |
如果把给定字符串全为小写英文字母改为大小写英文字母,则只要把 第13行改为:
if (minnext[i] <= 'z' && minnext[i] >= 'a' || minnext[i] <= 'Z');
(6)、中兴:编码实现字符串右移n位,如“diopheg” 右移2位为“egdioph”。
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
bool RightMoveStr(char *src, int n)
{ int len = strlen(src); int mov = n % len; char *rstr = (char *)malloc((mov + 1) * sizeof(char)); if (rstr == NULL) return false; int i = 0; while (i < mov) { rstr[i] = src[len - mov + i]; i++; } rstr[i] = '\0'; i = len - mov - 1; while (i >= 0) { src[i + mov] = src[i]; i--; } i = 0; while (i < mov) { src[i] = rstr[i]; i++; } free(rstr); return true; } bool RightMove(char *src, char *&ssrc, int n) { int len = strlen(src); ssrc = (char *)malloc(sizeof(char) * (len + 1)); if (ssrc == NULL) return false; n = n % len; char *tmp = src + len - n; strcpy(ssrc, tmp); strncat(ssrc, src, len - n); return true; } |
更巧妙的方法:
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
void reverse(char* str, int left, int right)
{ char tmp; while (left < right) { tmp = str[left]; str[left++] = str[right]; str[right--] = tmp; } } void rightmove(char* str, int k) { n = strlen(str); k = k % n; reverse(str, 0, n-k-1); reverse(str, n-k, n-1); reverse(str, 0, n-1); } |
(7)、新邮通:字符串反转:给定字符串“we;tonight;you;”,编码实现输出”ew;thginot;uoy;“
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
void ReverseStr(char *src)
{ int len = strlen(src); int i = 0; int first = 0; int end = 0; while (i < len) { if (src[i] == ';') { end = i - 1; while (first < end) { char tmp = src[first]; src[first] = src[end]; src[end] = tmp; first++; end--; } first = i + 1; } i++; } } |
如果给定字符串末尾没有';',只需要修改9,10,11行
C++ Code
1
2 3 4 5 6 7 8 |
if (src[i] == ';' || i == len - 1)
{ if (src[i] == ';') end = i - 1; else end = i; while... } |
(8)、不使用局部变量实现strlen、两数交换
C++ Code
1
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
#define swap(a,b) \
{ assert(sizeof(a)==sizeof(b)); char tempBuf[sizeof(a)]; memcpy(tempBuf,&a,sizeof(a)); memcpy(&a,&b,sizeof(b)); memcpy(&b,tempBuf,sizeof(b)); } #define swap(a, b) \ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) // typeof 是gcc支持,iso c支持__typeof__ int mstrlen(char *p) { return ToEnd(p)-p; } char * ToEnd(char * p) { while(*p != '\0') p++; return p; } |