三位数组实现的HashMap
网上的一个HashMap代码,用三个数组实现,不同于jdk中的实现方式。处理哈希冲突是采用二次哈希(再哈希)的策略,学习了一把,个别地方可能没有理解到位。写了一些注释,如果有错误,敬请指出。
- public final class LongHashMap {
- protected long table[];//存放键,类型为long,应该是用于特殊场所
- protected Object values[];//存放值
- protected byte state[];//state[i]=0,1,2表示table[i]与values[i]没有使用,已经使用,已删除
- protected int freeEntries;//空闲的空间数
- protected int distinct;//当前存了多少对键值
- protected int lowWaterMark;//当LongHashMap存放的键值对少于此数时,将重新调整(再哈希)
- protected int highWaterMark;//当LongHashMap存放的键值对大于此数时,将重新调整,由容量和装载因子决定
- protected double minLoadFactor;//最小装载因子
- protected double maxLoadFactor;//最大装载因子
- // 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中,
- //而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。
- protected static final int DEFAULT_CAPACITY = 277;//缺省的容量,一个素数
- protected static final double DEFAULT_MIN_LOAD_FACTOR = 0.2;//缺省的最小的装载因子
- protected static final double DEFAULT_MAX_LOAD_FACTOR = 0.6;//缺省的最大的装载因子
- protected static final byte FREE = 0;
- protected static final byte FULL = 1;
- protected static final byte REMOVED = 2;
- //用缺省的容量构建HashMap
- public LongHashMap() {
- this(DEFAULT_CAPACITY);
- }
- //构造函数
- public LongHashMap(int initialCapacity) {
- this(initialCapacity, DEFAULT_MIN_LOAD_FACTOR, DEFAULT_MAX_LOAD_FACTOR);
- }
- //构造函数
- public LongHashMap(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
- setUp(initialCapacity,minLoadFactor,maxLoadFactor);
- }
- //使用指定的初始化容量,最小装载因子,最大装载因子构建LongHashMap
- protected void setUp(int initialCapacity, double minLoadFactor, double maxLoadFactor) {
- if (initialCapacity < 0) {//参数检查
- throw new IllegalArgumentException(
- "Initial Capacity must not be less than zero: "+ initialCapacity
- );
- }
- if (minLoadFactor < 0.0 || minLoadFactor >= 1.0) {
- throw new IllegalArgumentException(
- "Illegal minLoadFactor: "+ minLoadFactor
- );
- }
- if (maxLoadFactor <= 0.0 || maxLoadFactor >= 1.0) {
- throw new IllegalArgumentException(
- "Illegal maxLoadFactor: "+ maxLoadFactor
- );
- }
- if (minLoadFactor >= maxLoadFactor) {
- throw new IllegalArgumentException(
- "Illegal minLoadFactor: " + minLoadFactor +
- " and maxLoadFactor: " + maxLoadFactor
- );
- }
- int capacity = initialCapacity;
- capacity = nextPrime(capacity);//程序将调整初始化容量,使之为素数
- if (capacity==0) {
- capacity=1;
- }
- this.table = new long[capacity];//关键字数组
- this.values = new Object[capacity];//值数组
- this.state = new byte[capacity];//状态数组
- this.minLoadFactor = minLoadFactor;
- if (capacity == LARGEST_PRIME) this.maxLoadFactor = 1.0;
- else this.maxLoadFactor = maxLoadFactor;
- this.distinct = 0;//开始时,LongHashMap中没有存入键值对
- this.freeEntries = capacity; // 开始时空闲的空间数=容量
- this.lowWaterMark = 0;
- // Math.min(capacity-2, (int) (capacity * maxLoadFactor));
- this.highWaterMark = chooseHighWaterMark(capacity, this.maxLoadFactor);
- }
- //扩容量,一个素数
- private int chooseGrowCapacity(int size, double minLoad, double maxLoad) {
- return nextPrime(Math.max(size+1, (int) ((4*size / (3*minLoad+maxLoad)))));
- }
- public boolean put(long key, Object value) {//在LongHashMap中存放键值对
- int i = indexOfInsertion(key);
- if (i<0) {
- i = -i -1;
- this.values[i]=value;
- return false;
- }
- if (this.distinct > this.highWaterMark) {//当存的健值对超过highWaterMark时,将重新构建LongHashMap
- int newCapacity = chooseGrowCapacity(
- this.distinct+1,
- this.minLoadFactor,
- this.maxLoadFactor
- );
- rehash(newCapacity);//用新容量重新构造
- return put(key, value);
- }
- this.table[i]=key;
- this.values[i]=value;
- if (this.state[i]==FREE) this.freeEntries--;//剩余空间少了一个
- this.state[i]=FULL;
- this.distinct++;//当前存放的键值对数目加1
- if (this.freeEntries < 1) {
- int newCapacity = chooseGrowCapacity(
- this.distinct+1,
- this.minLoadFactor,
- this.maxLoadFactor
- );
- rehash(newCapacity);//用新容量重新构造
- }
- return true;
- }
- //求关键字key的索引值,用于插入(添加)
- private final int indexOfInsertion(long key) {
- final long tab[] = table;
- final byte stat[] = state;
- final int length = tab.length;
- //这就是哈希函数了
- final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF;
- int i = hash % length;
- //发生哈希冲突时,用于再哈希探测的步长
- int decrement = (hash) % (length-2);
- if (decrement == 0) decrement = 1;
- //stat[i]有三种情况
- while (stat[i] == FULL && tab[i] != key) {//第一种,发生哈希冲突,往前探测
- i -= decrement;
- if (i<0) i+=length;
- }
- if (stat[i] == REMOVED) {//第二种,此位置原来的键值对已删除
- int j = i;
- //有意思,已删除的位置并不用来存新的
- while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
- i -= decrement;
- if (i<0) i+=length;
- }
- if (stat[i] == FREE) i = j;
- }
- if (stat[i] == FULL) {//第三种,这种情况会出现吗?
- return -i-1;
- }
- //第三种,stat[i]=FREE
- return i;
- }
- //删除key的value
- public boolean removeKey(long key) {
- int i = indexOfKey(key);//获取关键字的索引
- if (i<0) return false;
- this.state[i]=REMOVED;//作删除标记
- this.values[i]=null; //注意:table[i]并没有置为null
- this.distinct--;
- if (this.distinct < this.lowWaterMark) {//存放的键值对少于lowWaterMark,重新调整
- int newCapacity = chooseShrinkCapacity(
- this.distinct,
- this.minLoadFactor,
- this.maxLoadFactor
- );
- rehash(newCapacity);//用新容量重新构造
- }
- return true;
- }
- public final Object get(long key) {//获取关键字对应的值
- int i = indexOfKey(key);
- if (i<0) {
- return null;
- }
- else {
- return values[i];
- }
- }
- private final int indexOfKey(long key) {//求关键字之索引值,用于查找
- final long tab[] = table;
- final byte stat[] = state;
- final int length = tab.length;
- //这个是哈希函数
- final int hash = ((int)(key ^ (key >> 32))) & 0x7FFFFFFF;
- int i = hash % length;//得到了关键字的索引值
- //用于再哈希探测的步长
- int decrement = (hash) % (length-2);//减量
- if (decrement == 0) decrement = 1;
- while (stat[i] != FREE && (stat[i] == REMOVED || tab[i] != key)) {
- i -= decrement;//往前找
- if (i<0) i+=length;
- }
- if (stat[i] == FREE) return -1; // 没有找到
- return i; //找到了,返回索引值
- }
- public void clear() {//清空
- for (int i=0; i<state.length; i++) {
- state[i] = FREE;
- }
- for (int i=0; i<values.length-1; i++) {
- values[i] = null;
- }
- this.distinct = 0;
- this.freeEntries = table.length;
- trimToSize();//清空以后,容量不能太大,这里重新调整,以节约空间
- }
- public void trimToSize() {
- int newCapacity = nextPrime((int)(1 + 1.2*size()));
- if (table.length > newCapacity) {
- rehash(newCapacity);
- }
- }
- //是否包含key
- public boolean containsKey(long key) {
- return indexOfKey(key) >= 0;
- }
- //是否包含value
- public boolean containsValue(Object value) {
- return indexOfValue(value) >= 0;
- }
- public void ensureCapacity(int minCapacity) {//确保容量不小于minCapacity
- if (table.length < minCapacity) {
- int newCapacity = nextPrime(minCapacity);
- rehash(newCapacity);//再哈希
- }
- }
- protected int indexOfValue(Object value) {//获取值的索引
- final Object val[] = values;
- final byte stat[] = state;
- for (int i=stat.length; --i >= 0;) {
- if (stat[i]==FULL && val[i]==value) return i;
- }
- return -1; // not found
- }
- //获取value的第一个键值,可能的多个
- public long keyOf(Object value) {
- int i = indexOfValue(value);
- if (i<0) return Long.MIN_VALUE;
- return table[i];
- }
- public long[] keys() {//所有的键
- long[] elements = new long[distinct];
- long[] tab = table;
- byte[] stat = state;
- int j=0;
- for (int i = tab.length ; i-- > 0 ;) {
- if (stat[i]==FULL) {
- elements[j++]=tab[i];
- }
- }
- return elements;
- }
- public int size() {//当前存了多少对键值
- return distinct;
- }
- public boolean isEmpty() {
- return distinct == 0;
- }
- // 如果元素放得太满,就必须进行rehash(再哈希)。再哈希使空间增大,并将原有的对象重新导入新的LongHashMap中,
- //而原始的LongHashMap被删除。loadfactor(装载因子)决定何时要对LongHashMap进行再哈希。
- protected void rehash(int newCapacity) {//用新的容量重新构建LongHashMap
- int oldCapacity = table.length;//原来的容量
- long oldTable[] = table;//原来的键
- Object oldValues[] = values;//原来的值
- byte oldState[] = state;
- long newTable[] = new long[newCapacity];
- Object newValues[] = new Object[newCapacity];
- byte newState[] = new byte[newCapacity];
- //(int) (newCapacity * minLoadFactor);
- this.lowWaterMark = chooseLowWaterMark(newCapacity,this.minLoadFactor);
- this.highWaterMark = chooseHighWaterMark(newCapacity,this.maxLoadFactor);
- this.table = newTable;
- this.values = newValues;
- this.state = newState;
- this.freeEntries = newCapacity-this.distinct; // 当前的剩余空间
- for (int i = oldCapacity ; i-- > 0 ;) {
- if (oldState[i]==FULL) {
- long element = oldTable[i];
- int index = indexOfInsertion(element);
- newTable[index]=element;
- newValues[index]=oldValues[i];
- newState[index]=FULL;
- }
- }
- }
- public Object[] values() {
- Object[] elements = new Object[distinct];
- Object[] val = values;
- byte[] stat = state;
- int j=0;
- for (int i = stat.length ; i-- > 0 ;) {
- if (stat[i]==FULL) {
- elements[j++]=val[i];
- }
- }
- return elements;
- }
- private int chooseHighWaterMark(int capacity, double maxLoad) {
- return Math.min(capacity-2, (int) (capacity * maxLoad));
- }
- protected int chooseLowWaterMark(int capacity, double minLoad) {
- return (int) (capacity * minLoad);
- }
- protected int chooseMeanCapacity(int size, double minLoad, double maxLoad) {
- return nextPrime(Math.max(size+1, (int) ((2*size / (minLoad+maxLoad)))));
- }
- protected int chooseShrinkCapacity(int size, double minLoad, double maxLoad) {
- return nextPrime(Math.max(size+1, (int) ((4*size / (minLoad+3*maxLoad)))));
- }
- protected int nextPrime(int desiredCapacity) {//对指定的容量,在素数表中进行对半查找,返回一个素数容量
- int i = java.util.Arrays.binarySearch(primeCapacities, desiredCapacity);
- if(desiredCapacity==100) System.out.println("i="+i);
- if (i<0) {
- i = -i -1;
- }
- return primeCapacities[i];
- }
- private void printState(){
- for(int i=0;i<state.length;i++)
- System.out.print(state[i]+" ");
- System.out.println();
- }
- public static final int LARGEST_PRIME = Integer.MAX_VALUE; //最大的素数.
- private static final int[] primeCapacities = {//容量素数表
- LARGEST_PRIME,
- //chunk #1
- 5,11,23,47,97,197,397,797,1597,3203,6421,12853,25717,51437,102877,205759,
- 411527,823117,1646237,3292489,6584983,13169977,26339969,52679969,105359939,
- 210719881,421439783,842879579,1685759167,
- //chunk #2
- 433,877,1759,3527,7057,14143,28289,56591,113189,226379,452759,905551,1811107,
- 3622219,7244441,14488931,28977863,57955739,115911563,231823147,463646329,927292699,
- 1854585413,
- //chunk #3
- 953,1907,3821,7643,15287,30577,61169,122347,244703,489407,978821,1957651,3915341,
- 7830701,15661423,31322867,62645741,125291483,250582987,501165979,1002331963,
- 2004663929,
- //chunk #4
- 1039,2081,4177,8363,16729,33461,66923,133853,267713,535481,1070981,2141977,4283963,
- 8567929,17135863,34271747,68543509,137087021,274174111,548348231,1096696463,
- //chunk #5
- 31,67,137,277,557,1117,2237,4481,8963,17929,35863,71741,143483,286973,573953,
- 1147921,2295859,4591721,9183457,18366923,36733847,73467739,146935499,293871013,
- 587742049,1175484103,
- //chunk #6
- 599,1201,2411,4831,9677,19373,38747,77509,155027,310081,620171,1240361,2480729,
- 4961459,9922933,19845871,39691759,79383533,158767069,317534141,635068283,1270136683,
- //chunk #7
- 311,631,1277,2557,5119,10243,20507,41017,82037,164089,328213,656429,1312867,
- 2625761,5251529,10503061,21006137,42012281,84024581,168049163,336098327,672196673,
- 1344393353,
- //chunk #8
- 3,7,17,37,79,163,331,673,1361,2729,5471,10949,21911,43853,87719,175447,350899,
- 701819,1403641,2807303,5614657,11229331,22458671,44917381,89834777,179669557,
- 359339171,718678369,1437356741,
- //chunk #9
- 43,89,179,359,719,1439,2879,5779,11579,23159,46327,92657,185323,370661,741337,
- 1482707,2965421,5930887,11861791,23723597,47447201,94894427,189788857,379577741,
- 759155483,1518310967,
- //chunk #10
- 379,761,1523,3049,6101,12203,24407,48817,97649,195311,390647,781301,1562611,
- 3125257,6250537,12501169,25002389,50004791,100009607,200019221,400038451,800076929,
- 1600153859
- };
- static {
- java.util.Arrays.sort(primeCapacities);
- }
- //测试一下
- public static void main(String args[]){
- LongHashMap lh=new LongHashMap(5);//初始容量为5
- System.out.println("size="+lh.size());
- for(int i=0;i<3;i++)//先放三个
- lh.put(i, Integer.valueOf(i));
- System.out.println("size="+lh.size());
- lh.removeKey(1);//删除二个
- lh.removeKey(2);
- lh.put(123,"ok");//添加一个
- //看看状态
- lh.printState();
- lh.put(1234,"oo");//再放一个
- //看看状态
- lh.printState();
- //取出来
- System.out.println(lh.get(0));
- System.out.println(lh.get(123));
- System.out.println(lh.get(1234));
- }
- }
运行:
C:\ex>java LongHashMap
size=0
size=3
1 2 2 1 0
1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0
0
ok
oo
源码:
微信公众号:
猿人谷
如果您认为阅读这篇博客让您有些收获,不妨点击一下右下角的【推荐】
如果您希望与我交流互动,欢迎关注微信公众号
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接。