字符串如何排序

  搜索引擎中用的是中文和和英文作为关键字,为了达到快速搜索的目的,常常需要排序,下面就说书如何对中英文字符串进行排序。

  中文字符串按首字拼音排序

复制代码
import net.sourceforge.pinyin4j.PinyinHelper;  


public class PinyinComparator implements Comparator {

private String concatPinyinStringArray(String[] pinyinArray) {
StringBuffer pinyinStrBuf = new StringBuffer();

if ((null != pinyinArray) && (pinyinArray.length > 0)) {
for (int i = 0; i < pinyinArray.length; i++) {
pinyinStrBuf.append(pinyinArray[i]);
}
}
String outputString = pinyinStrBuf.toString();
return outputString;
}
public int compare(Object o1, Object o2) {
char c1 = ((String) o1).charAt(0);
char c2 = ((String) o2).charAt(0);
// System.out.println("c1--------->"+c1+"----c2--------------->"+c2);
return concatPinyinStringArray(
PinyinHelper.toHanyuPinyinStringArray(c1)).compareTo(
concatPinyinStringArray(PinyinHelper
.toHanyuPinyinStringArray(c2)));
}

public static void main(String[] args) {
String[] data = { "孙为", "孟的", "宋个", "尹个", "廖好", "张大", "张就", "张你", "徐人", "昆刚",
"曹吃", "曾看", "怡非" };

List<String> list = Arrays.asList(data);
Arrays.sort(data, new PinyinComparator());
System.out.println(list);
}
}
复制代码

Arrays中的sort算法

  下面是java.util.Arrays中sort算法的源码,这里以对int数组排序的算法为例,分析一下jdk源码中的排序算法:

复制代码
public static void sort(int[] a) {  
  sort1(a, 0, a.length);
}  
private static void sort1(int x[], int off, int len) {  
  // 对于小数组进行归并排序
  if (len < 7) {
   for (int i=off; i<len+off; i++)
   for (int j=i; j>off && x[j-1]>x[j]; j--)
   swap(x, j, j-1);
   return;
  }

  //这里的主要目的是当数组比较大是用快速排序法进行排序,为了防止快速排序的退化,取一个恰当的中位数,作为快速排序的理想中值,使快速排序效率提高。
  int m = off + (len >> 1); // Small arrays, middle element
  if (len > 7) {
  int l = off;
  int n = off + len - 1;
   if (len > 40) { // Big arrays, pseudomedian of 9
      int s = len/8;
     l = med3(x, l, l+s, l+2*s);
     m = med3(x, m-s, m, m+s);
      n = med3(x, n-2*s, n-s, n);
      }
   m = med3(x, l, m, n); // Mid-size, med of 3
  }
//取得的中值
  int v = x[m];
//快速排序
  int a = off, b = a, c = off + len - 1, d = c;
  while(true) {
   while (b <= c && x[b] <= v) {
     if (x[b] == v) swap(x, a++, b);
     b++;
   }
   while (c >= b && x[c] >= v) {
     if (x[c] == v) swap(x, c, d--);
     c--;
   }
   if (b > c) break;
   swap(x, b++, c--);
  }

  // Swap partition elements back to middle
  int s, n = off + len;
  s = Math.min(a-off, b-a ); vecswap(x, off, b-s, s);
  s = Math.min(d-c, n-d-1); vecswap(x, b, n-s, s);

  // Recursively sort non-partition-elements
  if ((s = b-a) > 1) sort1(x, off, s);
  if ((s = d-c) > 1) sort1(x, n-s, s);
}

/**
* Swaps x[a] with x[b].
* 交换 x[a] 和x[b]
*/
private static void swap(int x[], int a, int b) {
    int t = x[a];
    x[a] = x[b];
    x[b] = t;
}

/**
* Swaps x[a .. (a+n-1)] with x[b .. (b+n-1)].
* 交换 x[a .. (a+n-10] 和 x[b .. (b+n-1)]
*/
private static void vecswap(int x[], int a, int b, int n) {
    for (int i=0; i<n; i++, a++, b++) swap(x, a, b);
}

/**
* Returns the index of the median of the three indexed integers.
* 返回三个int类型的中值
*/
private static int med3(int x[], int a, int b, int c) {
    return (x[a] < x[b] ?(x[b] < x[c] ? b : x[a] < x[c] ? c : a) : (x[b] > x[c] ? b : x[a] > x[c] ? c : a));
}
复制代码

 

posted @ 2012-05-13 16:51  精灵博客园  阅读(338)  评论(0编辑  收藏  举报