支持快速查询的字符串数组
大致思路就是从string中得到一个hash码,然后比较hash码。
从Python的string源码中的发现的;线性搜索;时间复杂度O(n)吧大概。
这是Python的部分源码
static long string_hash(PyStringObject *a) { register int len; register unsigned char *p; register long x; if (a->ob_shash != -1) { return a->ob_shash; } len = a->ob_size; p = (unsigned char *)a->ob_sval; x = *p << 7; while (--len >= 0) x = (1000003*x) ^ *p++; x ^= a->ob_size; if (x == -1) x = -2; a->ob_shash = x; return x; }
hash码多次打乱可以理解,但是具体为什么就不知道了。如果有dalao求告知|・ω・`)
下面是我的代码,仅做记录用。
#include <stdio.h> #include <string.h> #include <stdlib.h> typedef struct FastStrings { int len; // max len of a string int num; // max number of strings int cur_num; char *string; long *hash; }* fstrs; int fstrs_init(fstrs *ff) { *ff = (fstrs)malloc(sizeof(FastStrings)); if (!*ff) return -1; fstrs f = *ff; f->cur_num = 0; f->len = 100; // contains '\0' f->num = 10; f->string = (char *)malloc(f->len*f->num*sizeof(char)); f->hash = (long *)malloc(f->num*sizeof(long)); if (!f->string || !f->hash) return -2; memset(f->string, 0, f->len*f->num*sizeof(char)); memset(f->hash, 0, f->num*sizeof(long)); return 0; } long _get_hash(char *str) { long x = 1, len = strlen(str), i = 0; while (--len >= 0) x = (1000003*x) ^ *str++; x ^= i; return x; } int fstrs_insert(fstrs f, char *str) { long len = strlen(str), i; if (f->cur_num >= f->num || len >= f->len) { return -1; } f->hash[f->cur_num] = _get_hash(str); memset(f->string+f->cur_num*f->len, 0, f->len); for (i = 0; i < len; i++) { f->string[f->cur_num*f->len+i] = *str++; } f->cur_num++; return 0; } int fstrs_find(fstrs f, char *str, int begin) { long hash = _get_hash(str), i; for (i = begin; i < f->cur_num; i++) { if (f->hash[i] == hash && strcmp(f->string+f->cur_num*f->len, str)) return i; } return -1; } int main() { int res; fstrs f = NULL; res = fstrs_init(&f); if (res < 0) {printf("f error:%d\n", res);return -1;} res = fstrs_insert(f, "asfsa"); res = fstrs_insert(f, "aassf"); res = fstrs_insert(f, "assdff"); res = fstrs_insert(f, "asf"); res = fstrs_find(f, "asf", 0); printf("res = %d\n", res); return 0; }
有时间再和字典树方法做一下比较。