一致性哈希算法C#实现
一致性hash实现,以下实现没有考虑多线程情况,也就是没有加锁,需要的可以自行加上。因为换行的问题,阅读不太方便,可以拷贝到本地再读。
1 /// <summary> 2 /// 一致性哈希。 3 /// </summary> 4 public static class ConsistentHashing 5 { 6 /// <summary> 7 /// 虚拟节点倍数 8 /// </summary> 9 private static int _virtualNodeMultiple = 100; 10 11 /// <summary> 12 /// 真实节点信息 13 /// </summary> 14 private static readonly List<string> Nodes = new List<string>(); 15 16 /// <summary> 17 /// 虚拟节点信息(int类型主要是为了获取虚拟节点时的二分查找) 18 /// </summary> 19 private static readonly List<int> VirtualNode = new List<int>(); 20 21 /// <summary> 22 /// 虚拟节点和真实节点映射,在获取到虚拟节点之后,能以O(1)的时间复杂度返回真实节点 23 /// </summary> 24 private static readonly Dictionary<int, string> VirtualNodeAndNodeMap = new Dictionary<int, string>(); 25 26 /// <summary> 27 /// 增加节点 28 /// </summary> 29 /// <param name="hosts">节点集合</param> 30 /// <returns>操作结果</returns> 31 public static bool AddNode(params string[] hosts) 32 { 33 if (hosts == null || hosts.Length == 0) 34 { 35 return false; 36 } 37 Nodes.AddRange(hosts); //先将节点增加到真实节点信息中。 38 foreach (var item in hosts) 39 { 40 for (var i = 1; i <= _virtualNodeMultiple; i++) //此处循环为类似“192.168.3.1”这样的真实ip字符串从1加到1000,算作虚拟节点。192.168.3.11,192.168.3.11000 41 { 42 var currentHash = HashAlgorithm.GetHashCode(item + i) & int.MaxValue; //计算一个hash,此处用自定义hash算法原因是字符串默认的哈希实现不保证对同一字符串获取hash时得到相同的值。和int.MaxValue进行位与操作是为了将获取到的hash值设置为正数 43 if (!VirtualNodeAndNodeMap.ContainsKey(currentHash)) //因为hash可能会重复,如果当前hash已经包含在虚拟节点和真实节点映射中,则以第一次添加的为准,此处不再进行添加 44 { 45 VirtualNode.Add(currentHash);//将当前虚拟节点添加到虚拟节点中 46 VirtualNodeAndNodeMap.Add(currentHash, item);//将当前虚拟节点和真实ip放入映射中。 47 } 48 } 49 } 50 VirtualNode.Sort(); //操作完成之后进行一次映射,是为了后面根据key的hash值查找虚拟节点时使用二分查找。 51 return true; 52 } 53 54 /// <summary> 55 /// 移除节点 56 /// </summary> 57 /// <param name="host">指定节点</param> 58 /// <returns></returns> 59 public static bool RemoveNode(string host) 60 { 61 if (!Nodes.Remove(host)) //如果将指定节点从真实节点集合中移出失败,后序操作不需要进行,直接返回 62 { 63 return false; 64 } 65 for (var i = 1; i <= _virtualNodeMultiple; i++) 66 { 67 var currentHash = HashAlgorithm.GetHashCode(host + i) & int.MaxValue; //计算一个hash,此处用自定义hash算法原因是字符串默认的哈希实现不保证对同一字符串获取hash时得到相同的值。和int.MaxValue进行位与操作是为了将获取到的hash值设置为正数 68 if (VirtualNodeAndNodeMap.ContainsKey(currentHash) && VirtualNodeAndNodeMap[currentHash] == host) //因为hash可能会重复,所以此处判断在判断了哈希值是否存在于虚拟节点和节点映射中之后还需要判断通过当前hash值获取到的节点是否和指定节点一致,如果不一致,则证明这个这个虚拟节点不是当前hash值所拥有的 69 { 70 VirtualNode.Remove(currentHash); //从虚拟节点中移出 71 VirtualNodeAndNodeMap.Remove(currentHash); //从虚拟节点和真实ip映射中移出 72 } 73 } 74 VirtualNode.Sort(); //操作完成之后进行一次映射,是为了后面根据key的hash值查找虚拟节点时使用二分查找。 75 return true; 76 } 77 78 /// <summary> 79 /// 获取所有节点 80 /// </summary> 81 /// <returns></returns> 82 public static List<string> GetAllNodes() 83 { 84 var nodes = new List<string>(Nodes.Count); 85 nodes.AddRange(Nodes); 86 return nodes; 87 } 88 89 /// <summary> 90 /// 获取节点数量 91 /// </summary> 92 /// <returns></returns> 93 public static int GetNodesCount() 94 { 95 return Nodes.Count; 96 } 97 98 /// <summary> 99 /// 重新设置虚拟节点倍数 100 /// </summary> 101 /// <param name="multiple"></param> 102 public static void ReSetVirtualNodeMultiple(int multiple) 103 { 104 if (multiple < 0 || multiple == _virtualNodeMultiple) 105 { 106 return; 107 } 108 var nodes = new List<string>(Nodes.Count); 109 nodes.AddRange(Nodes); //将现有的真实节点拷贝出来 110 _virtualNodeMultiple = multiple; //设置倍数 111 Nodes.Clear(); 112 VirtualNode.Clear(); 113 VirtualNodeAndNodeMap.Clear(); //清空数据 114 AddNode(nodes.ToArray()); //重新添加 115 } 116 117 /// <summary> 118 /// 获取节点 119 /// </summary> 120 /// <param name="key"></param> 121 /// <returns></returns> 122 public static string GetNode(string key) 123 { 124 var hash = HashAlgorithm.GetHashCode(key) & int.MaxValue; 125 var start = 0; 126 var end = VirtualNode.Count - 1; 127 while (end - start > 1) 128 { 129 var index = (start + end) / 2; 130 if (VirtualNode[index] > hash) 131 { 132 end = index; 133 } 134 else if (VirtualNode[index] < hash) 135 { 136 start = index; 137 } 138 else 139 { 140 start = end = index; 141 } 142 } 143 return VirtualNodeAndNodeMap[VirtualNode[start]]; 144 } 145 146 /// <summary> 147 /// hash 148 /// </summary> 149 private static class HashAlgorithm 150 { 151 public static int GetHashCode(string key) 152 { 153 return Hash(ComputeMd5(key)); 154 } 155 156 private static int Hash(byte[] digest, int nTime = 0) 157 { 158 long rv = ((long)(digest[3 + nTime * 4] & 0xFF) << 24) 159 | ((long)(digest[2 + nTime * 4] & 0xFF) << 16) 160 | ((long)(digest[1 + nTime * 4] & 0xFF) << 8) 161 | ((long)digest[0 + nTime * 4] & 0xFF); 162 return (int)(rv & 0xffffffffL); 163 } 164 private static byte[] ComputeMd5(string k) 165 { 166 MD5 md5 = new MD5CryptoServiceProvider(); 167 byte[] keyBytes = md5.ComputeHash(Encoding.UTF8.GetBytes(k)); 168 md5.Clear(); 169 return keyBytes; 170 } 171 } 172 }
测试代码:
1 class Program 2 { 3 static void Main(string[] args) 4 { 5 ConsistentHashing.AddNode(new[] 6 { 7 "192.168.137.1", 8 "192.168.137.2", 9 "192.168.137.3", 10 "192.168.137.4", 11 "192.168.137.5", 12 "192.168.137.6", 13 "192.168.137.7", 14 "192.168.137.8", 15 "192.168.137.9", 16 "192.168.137.10" 17 }); 18 var data = LoadTestData(); 19 20 Stopwatch stop = new Stopwatch(); 21 stop.Start(); 22 foreach (var item in data) 23 { 24 var node = ConsistentHashing.GetNode(item); 25 } 26 stop.Stop(); 27 28 var map10 = new Dictionary<string, string>(); 29 var mapCount10 = new Dictionary<string, int>(); 30 31 var map11 = new Dictionary<string, string>(); 32 var mapCount11 = new Dictionary<string, int>(); 33 34 var map9 = new Dictionary<string, string>(); 35 var mapCount9 = new Dictionary<string, int>(); 36 37 #region 10个节点 38 foreach (var item in data) 39 { 40 var host = ConsistentHashing.GetNode(item); 41 if (!map10.ContainsKey(item)) 42 { 43 map10.Add(item, host); 44 } 45 if (!mapCount10.ContainsKey(host)) 46 { 47 mapCount10.Add(host, 1); 48 } 49 else 50 { 51 mapCount10[host]++; 52 } 53 } 54 #endregion 55 56 #region 11个节点 57 ConsistentHashing.AddNode("192.168.137.11"); 58 foreach (var item in data) 59 { 60 var host = ConsistentHashing.GetNode(item); 61 if (!map11.ContainsKey(item)) 62 { 63 map11.Add(item, host); 64 } 65 if (!mapCount11.ContainsKey(host)) 66 { 67 mapCount11.Add(host, 1); 68 } 69 else 70 { 71 mapCount11[host]++; 72 } 73 } 74 #endregion 75 76 #region 9个节点 77 ConsistentHashing.RemoveNode("192.168.137.11"); 78 ConsistentHashing.RemoveNode("192.168.137.10"); 79 foreach (var item in data) 80 { 81 var host = ConsistentHashing.GetNode(item); 82 if (!map9.ContainsKey(item)) 83 { 84 map9.Add(item, host); 85 } 86 if (!mapCount9.ContainsKey(host)) 87 { 88 mapCount9.Add(host, 1); 89 } 90 else 91 { 92 mapCount9[host]++; 93 } 94 } 95 #endregion 96 97 #region 数据比较和存储 98 var tenAndNine = 0; 99 foreach (var item in map9) 100 { 101 if (map10[item.Key] != item.Value) 102 { 103 tenAndNine++; 104 } 105 } 106 var tenAndEleven = 0; 107 foreach (var item in map11) 108 { 109 if (map10[item.Key] != item.Value) 110 { 111 tenAndEleven++; 112 } 113 } 114 List<string> csv = new List<string>(); 115 csv.Add("ip,10,10分布,9,9分布,11,11分布"); 116 foreach (var item in mapCount11) 117 { 118 var str = item.Key; 119 if (mapCount10.ContainsKey(item.Key)) 120 { 121 str += "," + mapCount10[item.Key]; 122 str += "," + (mapCount10[item.Key] / (double)100000).ToString("F2"); 123 } 124 else 125 { 126 str += ","; 127 str += ","; 128 } 129 if (mapCount9.ContainsKey(item.Key)) 130 { 131 str += "," + mapCount9[item.Key]; 132 str += "," + (mapCount9[item.Key] / (double)100000).ToString("F2"); 133 } 134 else 135 { 136 str += ","; 137 str += ","; 138 } 139 str += "," + mapCount11[item.Key]; 140 str += "," + (mapCount11[item.Key] / (double)100000).ToString("F2"); 141 csv.Add(str); 142 } 143 csv.Add(string.Format("10-1的失效数据:{0},比例:{2:F2}。10+1的失效数据:{1},比例:{3:F2}", tenAndNine, tenAndEleven, (tenAndNine / (double)1000000), (tenAndEleven / (double)1000000))); 144 File.WriteAllLines(@"E:\1000.csv", csv, Encoding.UTF8); 145 #endregion 146 147 Console.ReadKey(); 148 } 149 150 /// <summary> 151 /// 生成测试key 152 /// </summary> 153 /// <param name="count"></param> 154 /// <returns></returns> 155 private static List<string> LoadTestData(int count = 1000000) 156 { 157 var data = new List<string>(count); 158 159 for (var i = 0; i < count; i++) 160 { 161 data.Add(GetRandomString(15, true, true, true, false, "")); 162 } 163 return data; 164 } 165 166 ///<summary> 167 ///生成随机字符串 168 ///</summary> 169 ///<param name="length">目标字符串的长度</param> 170 ///<param name="useNum">是否包含数字,1=包含,默认为包含</param> 171 ///<param name="useLow">是否包含小写字母,1=包含,默认为包含</param> 172 ///<param name="useUpp">是否包含大写字母,1=包含,默认为包含</param> 173 ///<param name="useSpe">是否包含特殊字符,1=包含,默认为不包含</param> 174 ///<param name="custom">要包含的自定义字符,直接输入要包含的字符列表</param> 175 ///<returns>指定长度的随机字符串</returns> 176 public static string GetRandomString(int length, bool useNum, bool useLow, bool useUpp, bool useSpe, string custom) 177 { 178 byte[] b = new byte[4]; 179 new System.Security.Cryptography.RNGCryptoServiceProvider().GetBytes(b); 180 Random r = new Random(BitConverter.ToInt32(b, 0)); 181 string s = null, str = custom; 182 if (useNum == true) { str += "0123456789"; } 183 if (useLow == true) { str += "abcdefghijklmnopqrstuvwxyz"; } 184 if (useUpp == true) { str += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; } 185 if (useSpe == true) { str += "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; } 186 for (int i = 0; i < length; i++) 187 { 188 s += str.Substring(r.Next(0, str.Length - 1), 1); 189 } 190 return s; 191 } 192 }
测试结果,key总数100万,图中的分布都是以10万为基数: