BitSet

一、原理

  BitSet 类由 long 数组组成,我们知道,long类型数字是64位,如果将 N 个long数字的bit连起来,则可以表示 64*N个数字的存在性(存在标志为1,不存在标志为0)。

二、源码分析

1、构造参数

    public BitSet(int nbits) {
        // nbits can't be negative; size 0 is OK
        if (nbits < 0)
            throw new NegativeArraySizeException("nbits < 0: " + nbits);

        initWords(nbits);
        sizeIsSticky = true; //标识words大小由用户指定
    }
   // 
    private void initWords(int nbits) {
        words = new long[wordIndex(nbits-1) + 1];
    }
   // 因为long类型是64位,所以bitIndex/64就可以得到bitIndex位于Long数组的第几位private static int wordIndex(int bitIndex) {
     // ADDRESS_BIT_PER_WORD = 6
return bitIndex >> ADDRESS_BITS_PER_WORD; }

2、set方法

   先计算出该index应该放在数组的哪一位,然后再该long的指定位置置1。

    public void set(int bitIndex) {
        if (bitIndex < 0)
            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

        int wordIndex = wordIndex(bitIndex);
        expandTo(wordIndex);
     // 给对应的bitIndex位设置为1(1 << 65 = 2:溢出后归0了,当bitIndex>64时,实际上bitIndex=bitIndex%64)
     // 1 << bitIndex = 2^bitIndex = 1000...(bitIndex个0)
words[wordIndex]
|= (1L << bitIndex); // Restores invariants checkInvariants(); }

   // 判断是否需要扩容
private void expandTo(int wordIndex) { int wordsRequired = wordIndex+1; if (wordsInUse < wordsRequired) { ensureCapacity(wordsRequired); wordsInUse = wordsRequired; } }    // 扩容操作,2倍扩容 private void ensureCapacity(int wordsRequired) { if (words.length < wordsRequired) { // Allocate larger of doubled size or required size int request = Math.max(2 * words.length, wordsRequired); words = Arrays.copyOf(words, request); sizeIsSticky = false; } } /** * Every public method must preserve these invariants. */ private void checkInvariants() { assert(wordsInUse == 0 || words[wordsInUse - 1] != 0); assert(wordsInUse >= 0 && wordsInUse <= words.length); assert(wordsInUse == words.length || words[wordsInUse] == 0); }

3、get方法

  这里主要还是  words[wordIndex] & (1L << bitIndex) != 0,判断指定位上是否为1.

    public boolean get(int bitIndex) {
        if (bitIndex < 0)
            throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex);

        checkInvariants();

        int wordIndex = wordIndex(bitIndex);
        return (wordIndex < wordsInUse)
            && ((words[wordIndex] & (1L << bitIndex)) != 0);
    }

 4、nextSetBit方法:从fromIndex开始,找到第一个为true的位的index

    public int nextSetBit(int fromIndex) {
        if (fromIndex < 0)
            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

        checkInvariants();

        int u = wordIndex(fromIndex);
        if (u >= wordsInUse)
            return -1;
     // WORD_INDEX = -1
     // WORD_INDEX << fromIndex:右边fromIndex位为0,其他位为1,ex: -1<<3 = 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111000
long word = words[u] & (WORD_MASK << fromIndex); while (true) { if (word != 0)
          // Long.numberOfTailingZeros(word):在word的二进制表示中,从最低位开始,找到第一个不为0的位的index
return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);//BITS_PER_WORD=64 if (++u == wordsInUse) //防止溢出,溢出时返回-1 return -1; word = words[u]; // 一直往后找,直至找到第一个为true的位 } }

5、nextClearBit方法:从fromIndex开始,找到第一个为false的位的index

    public int nextClearBit(int fromIndex) {
        // Neither spec nor implementation handle bitsets of maximal length.
        // See 4816253.
        if (fromIndex < 0)
            throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex);

        checkInvariants();

        int u = wordIndex(fromIndex);
        if (u >= wordsInUse)
            return fromIndex;
     // ~words[u]:word位上true变false,false变true,与netSetBit相反
        long word = ~words[u] & (WORD_MASK << fromIndex);

        while (true) {
            if (word != 0)
                return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);
            if (++u == wordsInUse)//如果溢出,则返回最大值
                return wordsInUse * BITS_PER_WORD;
            word = ~words[u];
        }
    }

三、应用

背景介绍:针对不同的城市配置不同的运维数据,在数据库中,运维数据 : 城市id = 1 : N

数据model大概是这样的:operateId(运维数据id), cityIds(多个城市id,用逗号分割)

需求:根据用户所在城市,匹配出需要展示给用户的运维数据

 

常规解决方案:搜出每一条运维数据,对每一条数据中的cityIds进行split,再与用户所在城市进行匹配。

  缺点:流量越大,对cpu的消耗就越大

 

使用BitSet:从数据库取出城市列表后进行一次split,转化为BitSet类型。实际过滤过程中只需要通过BitSet的get机制便可以判断该条运营数据投放的城市是否包含用户所在的城市。大致代码如下。

public class OperateModel {
    // 运营数据id
    private Integer operateId;
    // 城市id,用逗号分割
    private String cityIds;
    
    public Integer getOperateId() {
        return operateId;
    }
    public void setOperateId(Integer operateId) {
        this.operateId = operateId;
    }
    public String getCityIds() {
        return cityIds;
    }
    public void setCityIds(String cityIds) {
        this.cityIds = cityIds;
    }
}

public class OperateUtil {
    
    private Map<Integer, BitSet> map = new HashMap<Integer, BitSet>();
    
    /**
     * 初始化,将所有的运维数据中的城市列表转换为BitSet
     */
    public OperateUtil(List<OperateModel> operateList) {
        for (OperateModel operate : operateList) {
            createBitSet(operate);
        }
    }
    
    private void createBitSet(OperateModel operate) {
        if(StringUtils.isBlank(operate.getCityIds())) return;
        BitSet bitSet = new BitSet();
        for(String s : operate.getCityIds().split(",")) {
            bitSet.set(Integer.valueOf(s));
        }
        
        map.put(operate.getOperateId(), bitSet);
    }
    
    /**
     * 查询所有包含用户所在城市的运维数据
     */
    public List<Integer> queryOperates(int cityId) {
        List<Integer> list = new ArrayList<Integer>();
        for(Entry<Integer, BitSet> entry : map.entrySet()) {
            if(entry.getValue().get(cityId)) {
                list.add(entry.getKey());
            }
        }
        return list;
    }
}

 

posted @ 2019-09-02 23:50  莹狼  阅读(387)  评论(0编辑  收藏  举报