BitSet
一、原理
BitSet 类由 long 数组组成,我们知道,long类型数字是64位,如果将 N 个long数字的bit连起来,则可以表示 64*N个数字的存在性(存在标志为1,不存在标志为0)。
二、源码分析
1、构造参数
public BitSet(int nbits) { // nbits can't be negative; size 0 is OK if (nbits < 0) throw new NegativeArraySizeException("nbits < 0: " + nbits); initWords(nbits); sizeIsSticky = true; //标识words大小由用户指定 } // private void initWords(int nbits) { words = new long[wordIndex(nbits-1) + 1]; } // 因为long类型是64位,所以bitIndex/64就可以得到bitIndex位于Long数组的第几位private static int wordIndex(int bitIndex) {
// ADDRESS_BIT_PER_WORD = 6 return bitIndex >> ADDRESS_BITS_PER_WORD; }
2、set方法
先计算出该index应该放在数组的哪一位,然后再该long的指定位置置1。
public void set(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); int wordIndex = wordIndex(bitIndex); expandTo(wordIndex); // 给对应的bitIndex位设置为1(1 << 65 = 2:溢出后归0了,当bitIndex>64时,实际上bitIndex=bitIndex%64)
// 1 << bitIndex = 2^bitIndex = 1000...(bitIndex个0) words[wordIndex] |= (1L << bitIndex); // Restores invariants checkInvariants(); }
// 判断是否需要扩容 private void expandTo(int wordIndex) { int wordsRequired = wordIndex+1; if (wordsInUse < wordsRequired) { ensureCapacity(wordsRequired); wordsInUse = wordsRequired; } } // 扩容操作,2倍扩容 private void ensureCapacity(int wordsRequired) { if (words.length < wordsRequired) { // Allocate larger of doubled size or required size int request = Math.max(2 * words.length, wordsRequired); words = Arrays.copyOf(words, request); sizeIsSticky = false; } } /** * Every public method must preserve these invariants. */ private void checkInvariants() { assert(wordsInUse == 0 || words[wordsInUse - 1] != 0); assert(wordsInUse >= 0 && wordsInUse <= words.length); assert(wordsInUse == words.length || words[wordsInUse] == 0); }
3、get方法
这里主要还是 words[wordIndex] & (1L << bitIndex) != 0,判断指定位上是否为1.
public boolean get(int bitIndex) { if (bitIndex < 0) throw new IndexOutOfBoundsException("bitIndex < 0: " + bitIndex); checkInvariants(); int wordIndex = wordIndex(bitIndex); return (wordIndex < wordsInUse) && ((words[wordIndex] & (1L << bitIndex)) != 0); }
4、nextSetBit方法:从fromIndex开始,找到第一个为true的位的index
public int nextSetBit(int fromIndex) { if (fromIndex < 0) throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex); checkInvariants(); int u = wordIndex(fromIndex); if (u >= wordsInUse) return -1; // WORD_INDEX = -1
// WORD_INDEX << fromIndex:右边fromIndex位为0,其他位为1,ex: -1<<3 = 11111111 11111111 11111111 11111111 11111111 11111111 11111111 11111000 long word = words[u] & (WORD_MASK << fromIndex); while (true) { if (word != 0)
// Long.numberOfTailingZeros(word):在word的二进制表示中,从最低位开始,找到第一个不为0的位的index return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word);//BITS_PER_WORD=64 if (++u == wordsInUse) //防止溢出,溢出时返回-1 return -1; word = words[u]; // 一直往后找,直至找到第一个为true的位 } }
5、nextClearBit方法:从fromIndex开始,找到第一个为false的位的index
public int nextClearBit(int fromIndex) { // Neither spec nor implementation handle bitsets of maximal length. // See 4816253. if (fromIndex < 0) throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex); checkInvariants(); int u = wordIndex(fromIndex); if (u >= wordsInUse) return fromIndex; // ~words[u]:word位上true变false,false变true,与netSetBit相反 long word = ~words[u] & (WORD_MASK << fromIndex); while (true) { if (word != 0) return (u * BITS_PER_WORD) + Long.numberOfTrailingZeros(word); if (++u == wordsInUse)//如果溢出,则返回最大值 return wordsInUse * BITS_PER_WORD; word = ~words[u]; } }
三、应用
背景介绍:针对不同的城市配置不同的运维数据,在数据库中,运维数据 : 城市id = 1 : N
数据model大概是这样的:operateId(运维数据id), cityIds(多个城市id,用逗号分割)
需求:根据用户所在城市,匹配出需要展示给用户的运维数据
常规解决方案:搜出每一条运维数据,对每一条数据中的cityIds进行split,再与用户所在城市进行匹配。
缺点:流量越大,对cpu的消耗就越大
使用BitSet:从数据库取出城市列表后进行一次split,转化为BitSet类型。实际过滤过程中只需要通过BitSet的get机制便可以判断该条运营数据投放的城市是否包含用户所在的城市。大致代码如下。
public class OperateModel { // 运营数据id private Integer operateId; // 城市id,用逗号分割 private String cityIds; public Integer getOperateId() { return operateId; } public void setOperateId(Integer operateId) { this.operateId = operateId; } public String getCityIds() { return cityIds; } public void setCityIds(String cityIds) { this.cityIds = cityIds; } } public class OperateUtil { private Map<Integer, BitSet> map = new HashMap<Integer, BitSet>(); /** * 初始化,将所有的运维数据中的城市列表转换为BitSet */ public OperateUtil(List<OperateModel> operateList) { for (OperateModel operate : operateList) { createBitSet(operate); } } private void createBitSet(OperateModel operate) { if(StringUtils.isBlank(operate.getCityIds())) return; BitSet bitSet = new BitSet(); for(String s : operate.getCityIds().split(",")) { bitSet.set(Integer.valueOf(s)); } map.put(operate.getOperateId(), bitSet); } /** * 查询所有包含用户所在城市的运维数据 */ public List<Integer> queryOperates(int cityId) { List<Integer> list = new ArrayList<Integer>(); for(Entry<Integer, BitSet> entry : map.entrySet()) { if(entry.getValue().get(cityId)) { list.add(entry.getKey()); } } return list; } }