Karp-Rabin Hash 用于 字符串查找
#include <stdint.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>inline
uint32_t kr_hash(char const *str, size_t len)
{
uint32_t hash =0;
unsigned char const*p =(unsigned char const*)(str);
for (;len; --len) {
hash = (hash<<1) + *p++;
}
}inline
uint32_t kr_rehash(char a, char b, uint32_t h, int times) {
typedef unsigned char uchar_t;
return (((h) - (uchar_t)(a)*times) << 1) + (uchar_t)(b);
}
inline
int kr_times(size_t len)
{
int times = 1;
for (int i = 1; i < len; ++i)
times = (times<<1);
return times;
}
inline
char const* kr_strstr_start(char const *src, size_t src_len,
char const * pattern, size_t pattern_len, uint32_t hash,
uint32_t times)
{assert(pattern_len<=25); // pattern_len <= 25 char 。
if(src_len < pattern_len) return NULL;uint32_t hx = hash;
uint32_t hy = kr_hash(src, pattern_len);
/* Searching */
int j = 0;
while (j <= src_len-pattern_len) {
if (hx == hy && memcmp(pattern, src + j, pattern_len) == 0){
hash = hy;
return src+j;
}
hy = kr_rehash(src[j], src[j + pattern_len], hy, times);
++j;
}
return NULL;
}inline
char const * kr_strstr_next(char const *src, size_t src_len,
char const * pattern, size_t pattern_len,
uint32_t hash, uint32_t times)
{
if(src_len < pattern_len) return NULL;uint32_t hx = hash;
/* Searching */
int j = 0;
uint32_t hy = kr_rehash(src[j], src[j + pattern_len], hash, times);
++j;while (j <= src_len-pattern_len) {
if (hx == hy && memcmp(pattern, src + j, pattern_len) == 0){
return src+j;
}
hy = kr_rehash(src[j], src[j + pattern_len], hy, times);
++j;
}
return NULL;
}
#include <vector>
inline
int strstr(char const *src, char const * pattern, std::vector<char const *> &result)
{
char const * p = NULL;
int n = 0;
while(p = strstr(src, pattern)) {
result.push_back(p);
++n;
}
return n;
}inline
int kr_strstr(char const *src, size_t src_len, char const * pattern, size_t pattern_len, std::vector<char const *> &result)
{
if(pattern_len > 25) {
return strstr(src, pattern, result);
}
int n= 0;
uint32_t hash = kr_hash(pattern, pattern_len);
int times = 1<<(pattern_len-1);
char const * p =kr_strstr_start(src, src_len, pattern, pattern_len, hash, times);
for(;p; ) {
result.push_back(p);
n++;
int len = p - src;
if(len > src_len) break;
p = kr_strstr_next(p, src_len-len, pattern, pattern_len, hash, times);
}
return n;
}