支持快速查询的字符串数组

  大致思路就是从string中得到一个hash码,然后比较hash码。

  从Python的string源码中的发现的;线性搜索;时间复杂度O(n)吧大概。

  这是Python的部分源码

static long string_hash(PyStringObject *a) {
    register int len;
    register unsigned char *p;
    register long x;

    if (a->ob_shash != -1) {
        return a->ob_shash;
    }
    len = a->ob_size;
    p = (unsigned char *)a->ob_sval;
    x = *p << 7;
    while (--len >= 0) x = (1000003*x) ^ *p++;
    x ^= a->ob_size;
    if (x == -1) x = -2;
    a->ob_shash = x;
    return x;
}

  hash码多次打乱可以理解,但是具体为什么就不知道了。如果有dalao求告知|・ω・`) 

  下面是我的代码,仅做记录用。

  

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

typedef struct FastStrings {
    int len;    // max len of a string
    int num;    // max number of strings
    int cur_num;
    char *string;
    long *hash;
}* fstrs;

int fstrs_init(fstrs *ff) {
    *ff = (fstrs)malloc(sizeof(FastStrings));
    if (!*ff) return -1;
    fstrs f = *ff;
    f->cur_num = 0;
    f->len = 100;    // contains '\0'
    f->num = 10;
    f->string = (char *)malloc(f->len*f->num*sizeof(char));
    f->hash = (long *)malloc(f->num*sizeof(long));
    if (!f->string || !f->hash) return -2;
    memset(f->string, 0, f->len*f->num*sizeof(char));
    memset(f->hash, 0, f->num*sizeof(long));
    return 0;
}
long _get_hash(char *str) {
    long x = 1, len = strlen(str), i = 0;
    while (--len >= 0) x = (1000003*x) ^ *str++;
    x ^= i;
    return x;
}
int fstrs_insert(fstrs f, char *str) {
    long len = strlen(str), i;
    if (f->cur_num >= f->num || len >= f->len) {
        return -1;
    }
    f->hash[f->cur_num] = _get_hash(str);
    memset(f->string+f->cur_num*f->len, 0, f->len);
    for (i = 0; i < len; i++) {
        f->string[f->cur_num*f->len+i] = *str++;
    }
    f->cur_num++;
    return 0;
}
int fstrs_find(fstrs f, char *str, int begin) {
    long hash = _get_hash(str), i;
    for (i = begin; i < f->cur_num; i++) {
        if (f->hash[i] == hash && strcmp(f->string+f->cur_num*f->len, str))
            return i;
    }
    return -1;
}
int main() {
    int res;
    fstrs f = NULL;
    res = fstrs_init(&f);
    if (res < 0) {printf("f error:%d\n", res);return -1;}
    res = fstrs_insert(f, "asfsa");
    res = fstrs_insert(f, "aassf");
    res = fstrs_insert(f, "assdff");
    res = fstrs_insert(f, "asf");
    res = fstrs_find(f, "asf", 0);
    printf("res = %d\n", res);
    return 0;
}
View Code

 

  有时间再和字典树方法做一下比较。

posted @ 2018-04-02 23:55  backinfile  阅读(193)  评论(0编辑  收藏  举报