RocketMQ-过滤器

布隆过滤器

可以google一下对布隆算法的理解,总的来说就是可以精确判断某个key不存在,但是存在的判断结果有一定的误差,支持数据的插入和查询,对频繁删除数据的场景不支持。在分布式环境下可以配合redis的bitset命令,实现分布式布隆过滤器。

 

package org.apache.rocketmq.filter.util;

import com.google.common.hash.Hashing;

import java.nio.charset.Charset;

/**
 * Simple implement of bloom filter.
 */
public class BloomFilter {

    public static final Charset UTF_8 = Charset.forName("UTF-8");

    // as error rate, 10/100 = 0.1
    private int f = 10;
    private int n = 128;

    // hash function num, by calculation.
    private int k;
    // bit count, by calculation.
    private int m;

    /**
     * Create bloom filter by error rate and mapping num.
     *
     * @param f error rate
     * @param n num will mapping to bit
     */
    public static BloomFilter createByFn(int f, int n) {
        return new BloomFilter(f, n);
    }

    /**
     * Constructor.
     *
     * @param f error rate
     * @param n num will mapping to bit
     */
    private BloomFilter(int f, int n) {
        if (f < 1 || f >= 100) {
            throw new IllegalArgumentException("f must be greater or equal than 1 and less than 100");
        }
        if (n < 1) {
            throw new IllegalArgumentException("n must be greater than 0");
        }

        this.f = f;
        this.n = n;

        // set p = e^(-kn/m)
        // f = (1 - p)^k = e^(kln(1-p))
        // when p = 0.5, k = ln2 * (m/n), f = (1/2)^k = (0.618)^(m/n)
        double errorRate = f / 100.0;
        this.k = (int) Math.ceil(logMN(0.5, errorRate));

        if (this.k < 1) {
            throw new IllegalArgumentException("Hash function num is less than 1, maybe you should change the value of error rate or bit num!");
        }

        // m >= n*log2(1/f)*log2(e)
        this.m = (int) Math.ceil(this.n * logMN(2, 1 / errorRate) * logMN(2, Math.E));
        // m%8 = 0
        this.m = (int) (Byte.SIZE * Math.ceil(this.m / (Byte.SIZE * 1.0)));
    }

    /**
     * Calculate bit positions of {@code str}.
     * <p>
     * See "Less Hashing, Same Performance: Building a Better Bloom Filter" by Adam Kirsch and Michael
     * Mitzenmacher.
     * </p>
     * 根据key来计算多个要设置的比特位
     */
    public int[] calcBitPositions(String str) {
        int[] bitPositions = new int[this.k];

        long hash64 = Hashing.murmur3_128().hashString(str, UTF_8).asLong();

        int hash1 = (int) hash64;
        int hash2 = (int) (hash64 >>> 32);

        for (int i = 1; i <= this.k; i++) {
            int combinedHash = hash1 + (i * hash2);
            // Flip all the bits if it's negative (guaranteed positive number)
            if (combinedHash < 0) {
                combinedHash = ~combinedHash;
            }
            bitPositions[i - 1] = combinedHash % this.m;
        }

        return bitPositions;
    }

    /**
     * Calculate bit positions of {@code str} to construct {@code BloomFilterData}
     */
    public BloomFilterData generate(String str) {
        int[] bitPositions = calcBitPositions(str);

        return new BloomFilterData(bitPositions, this.m);
    }

    /**
     * Calculate bit positions of {@code str}, then set the related {@code bits} positions to 1.
     * 该方法是把一个key计算成多个比特位,并在BitsArray中设置成1
     */
    public void hashTo(String str, BitsArray bits) {
        hashTo(calcBitPositions(str), bits);
    }

    /**
     * Set the related {@code bits} positions to 1.
     */
    public void hashTo(int[] bitPositions, BitsArray bits) {
        check(bits);

        for (int i : bitPositions) {
            bits.setBit(i, true);
        }
    }

    /**
     * Extra check:
     * <li>1. check {@code filterData} belong to this bloom filter.</li>
     * <p>
     * Then set the related {@code bits} positions to 1.
     * </p>
     */
    public void hashTo(BloomFilterData filterData, BitsArray bits) {
        if (!isValid(filterData)) {
            throw new IllegalArgumentException(
                String.format("Bloom filter data may not belong to this filter! %s, %s",
                    filterData, this.toString())
            );
        }
        hashTo(filterData.getBitPos(), bits);
    }

    /**
     * Calculate bit positions of {@code str}, then check all the related {@code bits} positions is 1.
     *
     * @return true: all the related {@code bits} positions is 1
     */
    public boolean isHit(String str, BitsArray bits) {
        return isHit(calcBitPositions(str), bits);
    }

    /**
     * Check all the related {@code bits} positions is 1.
     * 判断是否命中
     * @return true: all the related {@code bits} positions is 1
     */
    public boolean isHit(int[] bitPositions, BitsArray bits) {
        check(bits);
        boolean ret = bits.getBit(bitPositions[0]);
        for (int i = 1; i < bitPositions.length; i++) {
            ret &= bits.getBit(bitPositions[i]);
        }
        return ret;
    }

    /**
     * Check all the related {@code bits} positions is 1.
     *
     * @return true: all the related {@code bits} positions is 1
     */
    public boolean isHit(BloomFilterData filterData, BitsArray bits) {
        if (!isValid(filterData)) {
            throw new IllegalArgumentException(
                String.format("Bloom filter data may not belong to this filter! %s, %s",
                    filterData, this.toString())
            );
        }
        return isHit(filterData.getBitPos(), bits);
    }

    /**
     * Check whether one of {@code bitPositions} has been occupied.
     *
     * @return true: if all positions have been occupied.
     */
    public boolean checkFalseHit(int[] bitPositions, BitsArray bits) {
        for (int j = 0; j < bitPositions.length; j++) {
            int pos = bitPositions[j];

            // check position of bits has been set.
            // that mean no one occupy the position.
            if (!bits.getBit(pos)) {
                return false;
            }
        }

        return true;
    }

    protected void check(BitsArray bits) {
        if (bits.bitLength() != this.m) {
            throw new IllegalArgumentException(
                String.format("Length(%d) of bits in BitsArray is not equal to %d!", bits.bitLength(), this.m)
            );
        }
    }

    /**
     * Check {@code BloomFilterData} is valid, and belong to this bloom filter.
     * <li>1. not null</li>
     * <li>2. {@link org.apache.rocketmq.filter.util.BloomFilterData#getBitNum} must be equal to {@code m} </li>
     * <li>3. {@link org.apache.rocketmq.filter.util.BloomFilterData#getBitPos} is not null</li>
     * <li>4. {@link org.apache.rocketmq.filter.util.BloomFilterData#getBitPos}'s length is equal to {@code k}</li>
     */
    public boolean isValid(BloomFilterData filterData) {
        if (filterData == null
            || filterData.getBitNum() != this.m
            || filterData.getBitPos() == null
            || filterData.getBitPos().length != this.k) {
            return false;
        }

        return true;
    }

    /**
     * error rate.
     */
    public int getF() {
        return f;
    }

    /**
     * expect mapping num.
     */
    public int getN() {
        return n;
    }

    /**
     * hash function num.
     */
    public int getK() {
        return k;
    }

    /**
     * total bit num.
     */
    public int getM() {
        return m;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o)
            return true;
        if (!(o instanceof BloomFilter))
            return false;

        BloomFilter that = (BloomFilter) o;

        if (f != that.f)
            return false;
        if (k != that.k)
            return false;
        if (m != that.m)
            return false;
        if (n != that.n)
            return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = f;
        result = 31 * result + n;
        result = 31 * result + k;
        result = 31 * result + m;
        return result;
    }

    @Override
    public String toString() {
        return String.format("f: %d, n: %d, k: %d, m: %d", f, n, k, m);
    }

    protected double logMN(double m, double n) {
        return Math.log(n) / Math.log(m);
    }
}

 

 

SQL92过滤

在filter下面的expresion包中,定义了多个Expression,这些Expression是根据sql compile出来的,然后调用evaluate方法,就可以具体判断该数据是否满足条件,具体看一下这个test case。

@Test
    public void testSQL() {
        try {
            /**
             * 判断这条数据是否符合sql条件
             */
            final Map<String, Object> props = new HashMap<>();
            props.put("a",1);
            props.put("b",1);

             Object ret = FilterFactory.INSTANCE.get(ExpressionType.SQL92).compile("a is not null and a > 0")
                    .evaluate(new EvaluationContext(){
                        //EvaluationContext  定义了待判断数据的获取上下文

                        @Override
                        public Object get(String name) {
                            return props.get(name);
                        }

                        @Override
                        public Map<String, Object> keyValues() {

                            return props;
                        }
                    });
            System.out.println(ret);  // True
        } catch (Exception e) {
            e.printStackTrace();
            assertThat(Boolean.FALSE).isTrue();
        }
    }

 

自定义过滤器

1 实现一个具体的过滤表达式 NothingExpression

static class NothingExpression implements Expression {

        @Override
        public Object evaluate(final EvaluationContext context) throws Exception {
            return Boolean.TRUE;
        }
    }

 

2 定义该表达式的过滤器,实现FilterSpi接口,其中compile接口就是返回一个初始化过的过滤表达式

static class NothingFilter implements FilterSpi {
        @Override
        public Expression compile(final String expr) throws MQFilterException {
            return new NothingExpression();
        }

        @Override
        public String ofType() {
            return "Nothing";
        }
    }

 

3 调用该表达式的evaluate。并传入待过滤的数据,返回过滤结果

@Test
    public void testRegister() {
        FilterFactory.INSTANCE.register(new NothingFilter());

        Expression expr = null;
        try {
            expr = FilterFactory.INSTANCE.get("Nothing").compile("abc");
        } catch (MQFilterException e) {
            e.printStackTrace();
            assertThat(Boolean.FALSE).isTrue();
        }

        assertThat(expr).isNotNull();

        try {
            assertThat((Boolean) expr.evaluate(new EmptyEvaluationContext())).isTrue();
        } catch (Exception e) {
            e.printStackTrace();
            assertThat(Boolean.FALSE).isTrue();
        }
    }

 

posted @ 2021-07-31 11:10  gaojy  阅读(221)  评论(0编辑  收藏  举报