(Ketama Algorithm)
Below code referenced the post
http://www.cnblogs.com/daizhj/archive/2010/08/24/1807324.html
问题
有集合A {a1,a2,...} ,集合B {b1,b2,...}.
将A的元素映射到B,如 a1->b1 ,a2->b1,a3->b1 ; a4->b2;a5->b2;a6->b2;...
A 的数量远大于B .B有可能增加或减少元素.
如何保持映射的稳定性。
For practice, It is rewritten and performance is improved.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace KetamaHashLab
{
public class Simulator
{
public const int ResourceCount = 100000;
public const int NodeCount = 20;
public int VirtualNodesCount = 500;
public void Execute()
{
List<String> resources = GetRandomIPs();
List<string> nodes = getNodes(NodeCount);
Console.WriteLine("Normal case : nodes count : " + nodes.Count());
Dictionary<string, List<string>> result = new Dictionary<string, List<string>>();
InitResourceMap(resources, result);
result = MappResources2Nodes(nodes, resources, result);
List<string> nodes1 = getNodes(NodeCount + 2);
Console.WriteLine("Added case : nodes count : " + nodes1.Count());
result = MappResources2Nodes(nodes1, resources, result);
List<string> nodes2 = getNodes(NodeCount - 2);
Console.WriteLine("Reduced case : nodes count : " + nodes2.Count());
result = MappResources2Nodes(nodes2, resources, result);
CountHitPercent(result);
}
private void InitResourceMap(List<string> resources, Dictionary<string, List<string>> map)
{
foreach (string key in resources)
{
map[key] = new List<string>();
}
}
public void CountHitPercent(Dictionary<string, List<string>> mapData)
{
int addCount = 0;
int reduceCount = 0;
foreach (string key in mapData.Keys)
{
List<string> list = mapData[key];
if (list.Count == 3)
{
if (list[0] == list[1])
{
addCount++;
}
if (list[0] == list[2])
{
reduceCount++;
}
}
else
{
Console.WriteLine("It's wrong size of list, key is " + key + ", size is " + list.Count);
}
}
Console.WriteLine("addCount is {0}, reduceCount is {1} .", addCount, reduceCount);
Console.WriteLine("Same percent in added case : " + (float)addCount * 100 / ResourceCount + "%");
Console.WriteLine("Same percent in reduced case : " + (float)reduceCount * 100 / ResourceCount + "%");
}
public Dictionary<string, List<string>> MappResources2Nodes(List<string> nodes, List<string> resources
, Dictionary<string, List<string>> map
)
{
KetamaNodeRing locator = new KetamaNodeRing(nodes, VirtualNodesCount);
DateTime start = DateTime.Now;
foreach (string key in map.Keys)
{
string node = locator.GetMappedKey(key);
if (node != null)
{
map[key].Add(node);
if (map[key].Count > 1)
{
}
}
else
{
throw new ApplicationException("Node is null!");
}
}
TimeSpan span = (DateTime.Now - start);
Console.WriteLine("Total MappResources2Nodes spent {0} seconds.", span.TotalSeconds);
return map;
}
private static List<string> getNodes(int nodeCount)
{
List<string> nodes = new List<string>();
for (int k = 1; k <= nodeCount; k++)
{
string node = "10.0.1.11:" + k;
nodes.Add(node);
}
return nodes;
}
private static List<String> GetRandomIPs()
{
List<string> allStrings = new List<string>(ResourceCount);
Random rand = new Random();
for (int i = 0; i < ResourceCount; i++)
{
string ipString = string.Format("{0}.{1}.{2}.{3}", rand.Next(0, 255), rand.Next(0, 255), rand.Next(0, 255), rand.Next(0, 255));
allStrings.Add(ipString);
}
return allStrings;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Security.Cryptography;
namespace KetamaHashLab
{
public class HashHelper
{
public static long hash(byte[] digest, int nTime)
{
long rv = ((long)(digest[3 + nTime * 4] & 0xFF) << 24)
| ((long)(digest[2 + nTime * 4] & 0xFF) << 16)
| ((long)(digest[1 + nTime * 4] & 0xFF) << 8)
| ((long)digest[0 + nTime * 4] & 0xFF);
return rv & 0xffffffffL; /* Truncate to 32-bits */
}
/**
* Get the md5 of the given key.
*/
public static byte[] computeMd5(string k)
{
MD5 md5 = new MD5CryptoServiceProvider();
byte[] keyBytes = md5.ComputeHash(Encoding.UTF8.GetBytes(k));
md5.Clear();
return keyBytes;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Security.Cryptography;
using System.Diagnostics;
namespace KetamaHashLab
{
public class KetamaNodeRing
{
private SortedList<long, string> ketamaNodes = new SortedList<long, string>();
List<long> sortedKeys = new List<long>();
public KetamaNodeRing(List<string> nodes, int VirtualNodesCount)
{
DateTime start = DateTime.Now;
ketamaNodes = new SortedList<long, string>();
//对所有节点,生成nCopies个虚拟结点
foreach (string node in nodes)
{
//每四个虚拟结点为一组
for (int i = 0; i < VirtualNodesCount / 4; i++)
{
//getKeyForNode方法为这组虚拟结点得到惟一名称
byte[] digest = HashHelper.computeMd5(node + i);
/** Md5是一个16字节长度的数组,将16字节的数组每四个字节一组,分别对应一个虚拟结点,这就是为什么上面把虚拟结点四个划分一组的原因*/
for (int h = 0; h < 4; h++)
{
long m = HashHelper.hash(digest, h);
if (ketamaNodes.ContainsKey(m))
{
//Console.WriteLine("ketamaNodes contains key :{0}", m);
}
ketamaNodes[m] = node;
}
}
}
sortedKeys = ketamaNodes.Keys.ToList();
TimeSpan span = (DateTime.Now - start);
Console.WriteLine("KetamaNodeRing spent {0} seconds.", span.TotalSeconds);
}
string GetNodeForKey_Optimized(long hash)
{
string rv;
long key = hash;
//如果找到这个节点,直接取节点,返回
if (!ketamaNodes.ContainsKey(key))
{
key = FindKeyBinarySearch(hash, sortedKeys);
}
else
{
Console.WriteLine("ketamaNodes contains key :{0}.", key);
}
rv = ketamaNodes[key];
return rv;
}
public string GetMappedKey(string resourceHash)
{
byte[] digest = HashHelper.computeMd5(resourceHash);
long hashVal = HashHelper.hash(digest, 0);
DateTime start = DateTime.Now;
string rv = GetNodeForKey_Optimized(hashVal);
TimeSpan span = (DateTime.Now - start);
return rv;
}
public static long FindKeyBinarySearch(long key, List<long> src)
{
long result = -1;
int start = 0;
int end = src.Count() - 1;
while (end - start > 1)
{
int mid = (start + end) / 2;
if (key == src[mid])
{
return key;
}
else if (key < src[mid])
{
end = mid;
}
else
{
start = mid;
}
}
bool find = false;
for (int i = start; i <= end; i++)
{
if (src[i] > key)
{
result = src[i];
find = true;
break;
}
}
if (!find)
{
result = src[0];
}
Debug.Assert(result != -1);
if (result == -1)
{
}
return result;
}
}
}
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace KetamaHashLab
{
class Program
{
static void Main(string[] args)
{
Simulator simulator = new Simulator();
simulator.Execute();
Console.Read();
}
}
}
output:
Resource count is 100000 .
Virtual Nodes count is 500 .
Normal case : nodes count : 20
KetamaNodeRing spent 0.150381 seconds.
Total MappResources2Nodes spent 2.333835 seconds.
Added case : nodes count : 22
KetamaNodeRing spent 0.160146 seconds.
Total MappResources2Nodes spent 2.4441795 seconds.
Reduced case : nodes count : 18
KetamaNodeRing spent 0.113274 seconds.
Total MappResources2Nodes spent 2.1668535 seconds.
addCount is 90646, reduceCount is 89923 .
Same percent in added case : 90.646%
Same percent in reduced case : 89.923%