算法 - 布隆过滤器
要点:有n条数据,通过m个hash函数,将每条数据都转化为m个数,存到0-x的数集中。查询时,先将数据转化为m个数,如果m个数不能在数集中全都找到,说明该数据一定不存在。但是,全都找到,不能说明数据一定存在。
1 import redis.clients.jedis.Jedis; 2 import redis.clients.jedis.Pipeline; 3 4 import java.io.IOException; 5 6 public class BloomFilter { 7 8 /** 9 * m -> 【布隆过滤器长度】 10 * n -> 插入元素个数 11 * p -> 误报率 12 * m = - nlnp / [(ln2)*(ln2)] 13 * <p> 14 * k -> 【哈希函数个数】 15 * k = m * ln2 / n 16 */ 17 18 private long filterLen; 19 private int hashFuncNum; 20 private static Jedis jedis; 21 22 public BloomFilter(long elementNum, double errorRate) { 23 this.filterLen = this.getFilterLen(elementNum, errorRate); 24 this.hashFuncNum = this.getHashFuncNum(this.filterLen, elementNum); 25 } 26 27 static { 28 jedis = new Jedis("127.0.0.1", 6379); 29 jedis.auth("password"); 30 } 31 32 /** 33 * 计算哈希函数数量 34 * 35 * @param filterLen 过滤器长度 36 * @param elementNum 元素个数 37 * @return 哈希函数数量 38 */ 39 private int getHashFuncNum(long filterLen, long elementNum) { 40 return new Double(Math.floor(filterLen * Math.log(2) / elementNum)).intValue(); 41 } 42 43 /** 44 * 计算布隆过滤器长度 45 * 46 * @param elementNum 元素格式 47 * @param errorRate 误报率 48 * @return 布隆过滤器长度 49 */ 50 private long getFilterLen(long elementNum, double errorRate) { 51 double m = -1 * elementNum * Math.log(errorRate) / Math.pow(Math.log(2), 2); 52 return new Double(m).longValue(); 53 } 54 55 /** 56 * 添加元素 57 * 58 * @param sign 59 * @param key 60 */ 61 public void add(String sign, String key) { 62 long[] hashArr = hash(key); 63 Pipeline pipeline = jedis.pipelined(); 64 try { 65 for (long hash : hashArr) { 66 pipeline.setbit(sign, hash, true); 67 } 68 pipeline.sync(); 69 } finally { 70 try { 71 pipeline.close(); 72 } catch (IOException e) { 73 e.printStackTrace(); 74 } 75 } 76 } 77 78 public boolean notExist(String sign, String key) { 79 long[] hashArr = hash(key); 80 boolean result; 81 Pipeline pipeline = jedis.pipelined(); 82 try { 83 for (long hash : hashArr) { 84 pipeline.getbit(sign, hash); 85 } 86 result = pipeline.syncAndReturnAll().contains(false); 87 } finally { 88 try { 89 pipeline.close(); 90 } catch (IOException e) { 91 e.printStackTrace(); 92 } 93 } 94 if (result) { 95 add(sign, key); 96 } 97 return result; 98 } 99 100 private long[] hash(String key) { 101 long hash1 = fnvHash(key); 102 long hash2 = hash1 >>> 16; 103 long[] result = new long[this.hashFuncNum]; 104 for (int i = 0; i < this.hashFuncNum; i++) { 105 long hash = hash1 + i * hash2; 106 if (hash < 0) { 107 hash = ~hash; 108 } 109 result[i] = hash % this.filterLen; 110 } 111 return result; 112 } 113 114 public static int fnvHash(String key) { 115 int hash = (int) 2166136261L; 116 for (int i = 0; i < key.length(); i++) { 117 hash += (hash * 16777619) ^ key.charAt(i); 118 } 119 return hash & Integer.MAX_VALUE; 120 } 121 122 public static void main(String[] args) { 123 String sign = "Bloom-Filter"; 124 BloomFilter bf = new BloomFilter(100000, 0.01); 125 bf.add(sign, "aaaaaa"); 126 bf.add(sign, "bbbbbb"); 127 System.out.println(bf.notExist(sign, "aaaaaa")); // false 128 System.out.println(bf.notExist(sign, "ffffff")); // true 如果不存在会写入,再执行为false 129 } 130 131 }
知止而后有定;定而后能静;静而后能安;安而后能虑;虑而后能得。