BitSet

一、原理

　　BitSet 类由 long 数组组成，我们知道，long类型数字是64位，如果将 N 个long数字的bit连起来，则可以表示 64*N个数字的存在性（存在标志为1，不存在标志为0）。

二、源码分析

1、构造参数

    public BitSet(int nbits) {
        // nbits can't be negative; size 0 is OK
        if (nbits < 0)
            throw new NegativeArraySizeException("nbits < 0: " + nbits);

        initWords(nbits);
        sizeIsSticky = true; //标识words大小由用户指定
    }
　　 // 
    private void initWords(int nbits) {
        words = new long[wordIndex(nbits-1) + 1];
    }
　　 // 因为long类型是64位，所以bitIndex/64就可以得到bitIndex位于Long数组的第几位private static int wordIndex(int bitIndex) {
　　　　  // ADDRESS_BIT_PER_WORD = 6
        return bitIndex >> ADDRESS_BITS_PER_WORD;
    }

2、set方法

　　　先计算出该index应该放在数组的哪一位，然后再该long的指定位置置1。

    public void set(int bitIndex) {
        if (bitIndex < 0)
            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

        int wordIndex = wordIndex(bitIndex);
        expandTo(wordIndex);
　　　　 // 给对应的bitIndex位设置为1（1 << 65 = 2：溢出后归0了，当bitIndex>64时，实际上bitIndex=bitIndex%64）
　　　　 // 1 << bitIndex = 2^bitIndex = 1000...(bitIndex个0)
        words[wordIndex] |= (1L << bitIndex); // Restores invariants

        checkInvariants();
    }

　　 // 判断是否需要扩容
    private void expandTo(int wordIndex) {
        int wordsRequired = wordIndex+1;
        if (wordsInUse < wordsRequired) {
            ensureCapacity(wordsRequired);
            wordsInUse = wordsRequired;
        }
    }
　　 // 扩容操作，2倍扩容
    private void ensureCapacity(int wordsRequired) {
        if (words.length < wordsRequired) {
            // Allocate larger of doubled size or required size
            int request = Math.max(2 * words.length, wordsRequired);
            words = Arrays.copyOf(words, request);
            sizeIsSticky = false;
        }
    }

    /**
     * Every public method must preserve these invariants.
     */
    private void checkInvariants() {
        assert(wordsInUse == 0 || words[wordsInUse - 1] != 0);
        assert(wordsInUse >= 0 && wordsInUse <= words.length);
        assert(wordsInUse == words.length || words[wordsInUse] == 0);
    }

3、get方法

　　这里主要还是 words[wordIndex] & (1L << bitIndex) != 0，判断指定位上是否为1.

    public boolean get(int bitIndex) {
        if (bitIndex < 0)
            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

        checkInvariants();

        int wordIndex = wordIndex(bitIndex);
        return (wordIndex < wordsInUse)
            && ((words[wordIndex] & (1L << bitIndex)) != 0);
    }

4、nextSetBit方法：从fromIndex开始，找到第一个为true的位的index

    public int nextSetBit(int fromIndex) {
        if (fromIndex < 0)
            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

        checkInvariants();

        int u = wordIndex(fromIndex);
        if (u >= wordsInUse)
            return -1;
　　　　 // WORD_INDEX = -1
　　　　 // WORD_INDEX << fromIndex：右边fromIndex位为0，其他位为1，ex: -1<<3 = 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111000
        long word = words[u] & (WORD_MASK << fromIndex);

        while (true) {
            if (word != 0)
　　　　　　　　　 // Long.numberOfTailingZeros(word)：在word的二进制表示中，从最低位开始，找到第一个不为0的位的index
                return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);//BITS_PER_WORD=64
            if (++u == wordsInUse) //防止溢出，溢出时返回-1
                return -1;
            word = words[u]; // 一直往后找，直至找到第一个为true的位
        }
    }

5、nextClearBit方法：从fromIndex开始，找到第一个为false的位的index

    public int nextClearBit(int fromIndex) {
        // Neither spec nor implementation handle bitsets of maximal length.
        // See 4816253.
        if (fromIndex < 0)
            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

        checkInvariants();

        int u = wordIndex(fromIndex);
        if (u >= wordsInUse)
            return fromIndex;
　　　　　// ~words[u]：word位上true变false，false变true，与netSetBit相反
        long word = ~words[u] & (WORD_MASK << fromIndex);

        while (true) {
            if (word != 0)
                return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);
            if (++u == wordsInUse)//如果溢出，则返回最大值
                return wordsInUse * BITS_PER_WORD;
            word = ~words[u];
        }
    }

三、应用

背景介绍：针对不同的城市配置不同的运维数据，在数据库中，运维数据 : 城市id = 1 : N

数据model大概是这样的：operateId（运维数据id）, cityIds（多个城市id，用逗号分割）

需求：根据用户所在城市，匹配出需要展示给用户的运维数据

常规解决方案：搜出每一条运维数据，对每一条数据中的cityIds进行split，再与用户所在城市进行匹配。

　　缺点：流量越大，对cpu的消耗就越大

使用BitSet：从数据库取出城市列表后进行一次split，转化为BitSet类型。实际过滤过程中只需要通过BitSet的get机制便可以判断该条运营数据投放的城市是否包含用户所在的城市。大致代码如下。

public class OperateModel {
    // 运营数据id
    private Integer operateId;
    // 城市id，用逗号分割
    private String cityIds;
    
    public Integer getOperateId() {
        return operateId;
    }
    public void setOperateId(Integer operateId) {
        this.operateId = operateId;
    }
    public String getCityIds() {
        return cityIds;
    }
    public void setCityIds(String cityIds) {
        this.cityIds = cityIds;
    }
}

public class OperateUtil {
    
    private Map<Integer, BitSet> map = new HashMap<Integer, BitSet>();
    
    /**
     * 初始化，将所有的运维数据中的城市列表转换为BitSet
     */
    public OperateUtil(List<OperateModel> operateList) {
        for (OperateModel operate : operateList) {
            createBitSet(operate);
        }
    }
    
    private void createBitSet(OperateModel operate) {
        if(StringUtils.isBlank(operate.getCityIds())) return;
        BitSet bitSet = new BitSet();
        for(String s : operate.getCityIds().split(",")) {
            bitSet.set(Integer.valueOf(s));
        }
        
        map.put(operate.getOperateId(), bitSet);
    }
    
    /**
     * 查询所有包含用户所在城市的运维数据
     */
    public List<Integer> queryOperates(int cityId) {
        List<Integer> list = new ArrayList<Integer>();
        for(Entry<Integer, BitSet> entry : map.entrySet()) {
            if(entry.getValue().get(cityId)) {
                list.add(entry.getKey());
            }
        }
        return list;
    }
}

posted @ 2019-09-02 23:50 莹狼阅读(387) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

莹狼