java JDK里面容器类的排序算法使用的主要是插入排序和归并排序,可能不同版本的实现有所不同,关键代码如下:
/** * Performs a sort on the section of the array between the given indices * using a mergesort with exponential search algorithm (in which the merge * is performed by exponential search). n*log(n) performance is guaranteed * and in the average case it will be faster then any mergesort in which the * merge is performed by linear search. * * @param in - * the array for sorting. * @param out - * the result, sorted array. * @param start * the start index * @param end * the end index + 1 */ @SuppressWarnings("unchecked") private static void mergeSort(Object[] in, Object[] out, int start, int end) { int len = end - start; // use insertion sort for small arrays if (len <= SIMPLE_LENGTH) { for (int i = start + 1; i < end; i++) { Comparable<Object> current = (Comparable<Object>) out[i]; Object prev = out[i - 1]; if (current.compareTo(prev) < 0) { int j = i; do { out[j--] = prev; } while (j > start && current.compareTo(prev = out[j - 1]) < 0); out[j] = current; } } return; } int med = (end + start) >>> 1; mergeSort(out, in, start, med); mergeSort(out, in, med, end); // merging // if arrays are already sorted - no merge if (((Comparable<Object>) in[med - 1]).compareTo(in[med]) <= 0) { System.arraycopy(in, start, out, start, len); return; } int r = med, i = start; // use merging with exponential search do { Comparable<Object> fromVal = (Comparable<Object>) in[start]; Comparable<Object> rVal = (Comparable<Object>) in[r]; if (fromVal.compareTo(rVal) <= 0) { int l_1 = find(in, rVal, -1, start + 1, med - 1); int toCopy = l_1 - start + 1; System.arraycopy(in, start, out, i, toCopy); i += toCopy; out[i++] = rVal; r++; start = l_1 + 1; } else { int r_1 = find(in, fromVal, 0, r + 1, end - 1); int toCopy = r_1 - r + 1; System.arraycopy(in, r, out, i, toCopy); i += toCopy; out[i++] = fromVal; start++; r = r_1 + 1; } } while ((end - r) > 0 && (med - start) > 0); // copy rest of array if ((end - r) <= 0) { System.arraycopy(in, start, out, i, med - start); } else { System.arraycopy(in, r, out, i, end - r); } }
看到编程珠玑上有一个很有趣的排序算法-位图法其思想是用1位来表示[0~n-1]中的整数是否存在。1表示存在,0表示不存在。即将正整数映射到bit集合中,每一个bit代表其映射的正整数是否存在。
比如{1,2,3,5,8,13}使用下列集合表示:
0 1 1 1 0 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0
伪代码如下:
for (i in [0~n-1]) bit[i] = 0;
for(i in [0~n-1])
if (i in input file)
bit[i] = 1
for(i in [0~n-1])
if(bit[i] == 1)
output i
用java 代码尝试下,效率果然不错:
public class javaUniqueSort { public static int[] temp = new int[1000001]; public static List<Integer> tempList = new ArrayList<Integer>(); public static int count; public static void main(final String[] args) { List<Integer> firstNum = new ArrayList<Integer>(); List<Integer> secondNum = new ArrayList<Integer>(); for (int i = 1; i <= 1000000; i++) { firstNum.add(i); secondNum.add(i); } Collections.shuffle(firstNum); Collections.shuffle(secondNum); getStartTime(); Collections.sort(firstNum); getEndTime("java sort run time "); getStartTime(); secondNum = uniqueSort(secondNum); getEndTime("uniqueSort run time "); } public static List<Integer> uniqueSort(final List<Integer> uniqueList) { javaUniqueSort.tempList.clear(); for (int i = 0; i < javaUniqueSort.temp.length; i++) { javaUniqueSort.temp[i] = 0; } for (int i = 0; i < uniqueList.size(); i++) { javaUniqueSort.temp[uniqueList.get(i)] = 1; } for (int i = 0; i < javaUniqueSort.temp.length; i++) { if (javaUniqueSort.temp[i] == 1) { javaUniqueSort.tempList.add(i); } } return javaUniqueSort.tempList; } public static void getStartTime() { javaShuffle.start = System.nanoTime(); } public static void getEndTime(final String s) { javaShuffle.end = System.nanoTime(); System.out.println(s + ": " + (javaShuffle.end - javaShuffle.start) + "ns"); } } 运行时间: java sort run time : 1257737334ns uniqueSort run time : 170228290ns java sort run time : 1202749828ns uniqueSort run time : 169327770ns
如果有重复数据,可以修改下:
public class javaDuplicateSort { public static List<Integer> tempList = new ArrayList<Integer>(); public static int count; public static void main(final String[] args) { Random random = new Random(); List<Integer> firstNum = new ArrayList<Integer>(); List<Integer> secondNum = new ArrayList<Integer>(); for (int i = 1; i <= 100000; i++) { firstNum.add(i); secondNum.add(i); firstNum.add(random.nextInt(i + 1)); secondNum.add(random.nextInt(i + 1)); } Collections.shuffle(firstNum); Collections.shuffle(secondNum); getStartTime(); Collections.sort(firstNum); getEndTime("java sort run time "); getStartTime(); secondNum = uniqueSort(secondNum); getEndTime("uniqueSort run time "); } public static List<Integer> uniqueSort(final List<Integer> uniqueList) { javaDuplicateSort.tempList.clear(); int[] temp = new int[200002]; for (int i = 0; i < temp.length; i++) { temp[i] = 0; } for (int i = 0; i < uniqueList.size(); i++) { temp[uniqueList.get(i)]++; } for (int i = 0; i < temp.length; i++) { for (int j = temp[i]; j > 0; j--) { javaDuplicateSort.tempList.add(i); } } return javaDuplicateSort.tempList; } public static void getStartTime() { javaShuffle.start = System.nanoTime(); } public static void getEndTime(final String s) { javaShuffle.end = System.nanoTime(); System.out.println(s + ": " + (javaShuffle.end - javaShuffle.start) + "ns"); } }
这种算法还是有很明显的局限性的,比如说要知道数据中最大的数值,更重要的是数据的疏密程度,比如说最大值为1000000而要数组大小只有100,那么效率会下降的非常明显。。。。。但是,使用位图法进行排序,确实让人眼前一亮。位图法通常是用来存储数据,判断某个数据存不存在或者判断数组是否存在重复 。
Kingle's Blog: http://www.cnblogs.com/Kingle/