Bloom Filter布隆过滤器原理和实现(2)
这一篇主要是根据 leveldb 实现的 布隆过滤器的简单版
#include <iostream> #include <cstring> using namespace std; #ifndef FALLTHROUGH_INTENDED #define FALLTHROUGH_INTENDED \ do { \ } while (0) #endif int LittleEndian() { //返回1,小端,返回0, 大端 int a = 1; return *(char*)&a; } inline uint32_t DecodeFixed32(const char* ptr) { const uint8_t* const buffer = reinterpret_cast<const uint8_t*>(ptr); if (LittleEndian) { uint32_t result; std::memcpy(&result, buffer, sizeof(uint32_t)); return result; } return (static_cast<uint32_t>(buffer[0])) | (static_cast<uint32_t>(buffer[1]) << 8) | (static_cast<uint32_t>(buffer[2]) << 16) | (static_cast<uint32_t>(buffer[3]) << 24); } uint32_t Hash(const char* data, size_t n, uint32_t seed) { // Similar to murmur hash const uint32_t m = 0xc6a4a793; const uint32_t r = 24; const char* limit = data + n; uint32_t h = seed ^ (n * m); // Pick up four bytes at a time while (data + 4 <= limit) { uint32_t w = DecodeFixed32(data); data += 4; h += w; h *= m; h ^= (h >> 16); } // Pick up remaining bytes switch (limit - data) { case 3: h += static_cast<uint8_t>(data[2]) << 16; FALLTHROUGH_INTENDED; case 2: h += static_cast<uint8_t>(data[1]) << 8; FALLTHROUGH_INTENDED; case 1: h += static_cast<uint8_t>(data[0]); h *= m; h ^= (h >> r); break; } return h; } class Bitmap { public: Bitmap(size_t size) : size_(size) { bits.resize((size_ >> 3) + 1, 0); //多开辟一个空间,原因是数组只能表示区间[0,size) } void bitmapSet(size_t val) { bits[val >> 3] |= (1 << (val % 8)); // >> 3 相当于除以8,用移位操作可提高性能 } bool bitmapGet(size_t val) { return bits[val >> 3] & (1 << (val % 8)); } private: size_t size_; std::string bits; }; class BloomFilter { private: static uint32_t BloomHash(const std::string& key) { return Hash(key.data(), key.size(), 0xbc9f1d34); } enum { defaultSize = 100000000 * 16 }; //16亿 public: BloomFilter() : k_(8) { bitmap_ = new Bitmap(defaultSize); } ~BloomFilter() { delete bitmap_; } void Add(const string& s) { uint32_t h = BloomHash(s); const uint32_t delta = (h >> 17) | (h << 15); for (size_t i = 0; i < k_; ++i) { uint32_t bitpos = h % defaultSize; bitmap_->bitmapSet(bitpos); h += delta; } } bool Contain(const string& s) { bool ret = true; uint32_t h = BloomHash(s); const uint32_t delta = (h >> 17) | (h << 15); for (size_t i = 0; i < k_; ++i) { uint32_t bitpos = h % defaultSize; ret = ret && bitmap_->bitmapGet(bitpos); h += delta; } return ret; } private: int k_; // hash的个数 Bitmap* bitmap_; }; void bloomFilterTest() { std::string email = "1293173298@qq.com"; BloomFilter bf; bf.Add(email); bool ret1 = bf.Contain(email); // true bool ret2 = bf.Contain("even.com"); // false } int main() { bloomFilterTest(); system("pause"); return 0; }