算法 - 布隆过滤器

要点:有n条数据,通过m个hash函数,将每条数据都转化为m个数,存到0-x的数集中。查询时,先将数据转化为m个数,如果m个数不能在数集中全都找到,说明该数据一定不存在。但是,全都找到,不能说明数据一定存在。

  1 import redis.clients.jedis.Jedis;
  2 import redis.clients.jedis.Pipeline;
  3 
  4 import java.io.IOException;
  5 
  6 public class BloomFilter {
  7 
  8     /**
  9      * m -> 【布隆过滤器长度】
 10      * n -> 插入元素个数
 11      * p -> 误报率
 12      * m = - nlnp / [(ln2)*(ln2)]
 13      * <p>
 14      * k -> 【哈希函数个数】
 15      * k = m * ln2 / n
 16      */
 17 
 18     private long filterLen;
 19     private int hashFuncNum;
 20     private static Jedis jedis;
 21 
 22     public BloomFilter(long elementNum, double errorRate) {
 23         this.filterLen = this.getFilterLen(elementNum, errorRate);
 24         this.hashFuncNum = this.getHashFuncNum(this.filterLen, elementNum);
 25     }
 26 
 27     static {
 28         jedis = new Jedis("127.0.0.1", 6379);
 29         jedis.auth("password");
 30     }
 31 
 32     /**
 33      * 计算哈希函数数量
 34      *
 35      * @param filterLen  过滤器长度
 36      * @param elementNum 元素个数
 37      * @return 哈希函数数量
 38      */
 39     private int getHashFuncNum(long filterLen, long elementNum) {
 40         return new Double(Math.floor(filterLen * Math.log(2) / elementNum)).intValue();
 41     }
 42 
 43     /**
 44      * 计算布隆过滤器长度
 45      *
 46      * @param elementNum 元素格式
 47      * @param errorRate  误报率
 48      * @return 布隆过滤器长度
 49      */
 50     private long getFilterLen(long elementNum, double errorRate) {
 51         double m = -1 * elementNum * Math.log(errorRate) / Math.pow(Math.log(2), 2);
 52         return new Double(m).longValue();
 53     }
 54 
 55     /**
 56      * 添加元素
 57      *
 58      * @param sign
 59      * @param key
 60      */
 61     public void add(String sign, String key) {
 62         long[] hashArr = hash(key);
 63         Pipeline pipeline = jedis.pipelined();
 64         try {
 65             for (long hash : hashArr) {
 66                 pipeline.setbit(sign, hash, true);
 67             }
 68             pipeline.sync();
 69         } finally {
 70             try {
 71                 pipeline.close();
 72             } catch (IOException e) {
 73                 e.printStackTrace();
 74             }
 75         }
 76     }
 77 
 78     public boolean notExist(String sign, String key) {
 79         long[] hashArr = hash(key);
 80         boolean result;
 81         Pipeline pipeline = jedis.pipelined();
 82         try {
 83             for (long hash : hashArr) {
 84                 pipeline.getbit(sign, hash);
 85             }
 86             result = pipeline.syncAndReturnAll().contains(false);
 87         } finally {
 88             try {
 89                 pipeline.close();
 90             } catch (IOException e) {
 91                 e.printStackTrace();
 92             }
 93         }
 94         if (result) {
 95             add(sign, key);
 96         }
 97         return result;
 98     }
 99 
100     private long[] hash(String key) {
101         long hash1 = fnvHash(key);
102         long hash2 = hash1 >>> 16;
103         long[] result = new long[this.hashFuncNum];
104         for (int i = 0; i < this.hashFuncNum; i++) {
105             long hash = hash1 + i * hash2;
106             if (hash < 0) {
107                 hash = ~hash;
108             }
109             result[i] = hash % this.filterLen;
110         }
111         return result;
112     }
113 
114     public static int fnvHash(String key) {
115         int hash = (int) 2166136261L;
116         for (int i = 0; i < key.length(); i++) {
117             hash += (hash * 16777619) ^ key.charAt(i);
118         }
119         return hash & Integer.MAX_VALUE;
120     }
121 
122     public static void main(String[] args) {
123         String sign = "Bloom-Filter";
124         BloomFilter bf = new BloomFilter(100000, 0.01);
125         bf.add(sign, "aaaaaa");
126         bf.add(sign, "bbbbbb");
127         System.out.println(bf.notExist(sign, "aaaaaa"));    // false
128         System.out.println(bf.notExist(sign, "ffffff"));    // true 如果不存在会写入,再执行为false
129     }
130 
131 }

 

posted @ 2020-05-05 16:42  御简  阅读(154)  评论(0编辑  收藏  举报