java 获取中文字符的首字母
原理: GB2312编码中的中文是按照拼音排序的
注意: 一些生僻的字无法获得正确的首字母,原因是这些字都是后加入的。
1 import java.io.UnsupportedEncodingException; 2 3 /** 4 * 取得给定汉字串的首字母串,即声母串 5 * 6 * 注:只支持GB2312字符集中的汉字 7 */ 8 class ChineseInital { 9 private final static int[] areaCode = { 1601, 1637, 1833, 2078, 2274, 10 2302, 2433, 2594, 2787, 3106, 3212, 3472, 3635, 3722, 3730, 3858, 11 4027, 4086, 4390, 4558, 4684, 4925, 5249, 5590 }; 12 private final static String[] letters = { "a", "b", "c", "d", "e", 13 "f", "g", "h", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", 14 "t", "w", "x", "y", "z" }; 15 16 /** 17 * 取得给定汉字串的首字母串 18 * @param str 给定汉字串 19 * @return 所有字符的首字母 20 */ 21 public static String getAllFirstLetter(String str) { 22 if (str == null || str.trim().length() == 0) { 23 return ""; 24 } 25 26 String _str = ""; 27 for (int i = 0; i < str.length(); i++) { 28 _str = _str + this.getFirstLetter(str.substring(i, i + 1)); 29 } 30 31 return _str; 32 } 33 34 /** 35 * 取得给定汉字的首字母,即声母 36 * @param chinese 给定的汉字 37 * @return 给定汉字的声母 38 */ 39 public static String getFirstLetter(String chinese) { 40 if (chinese == null || chinese.trim().length() == 0) { 41 return ""; 42 } 43 chinese = this.conversionStr(chinese, "GB2312", "ISO8859-1"); 44 45 if (chinese.length() > 1) // 判断是不是汉字 46 { 47 int li_SectorCode = (int) chinese.charAt(0); // 汉字区码 48 int li_PositionCode = (int) chinese.charAt(1); // 汉字位码 49 li_SectorCode = li_SectorCode - 160; 50 li_PositionCode = li_PositionCode - 160; 51 int li_SecPosCode = li_SectorCode * 100 + li_PositionCode; // 汉字区位码 52 if (li_SecPosCode > 1600 && li_SecPosCode < 5590) { 53 for (int i = 0; i < 23; i++) { 54 if (li_SecPosCode >= areaCode[i] 55 && li_SecPosCode < areaCode[i + 1]) { 56 chinese = letters[i]; 57 break; 58 } 59 } 60 } else // 非汉字字符,如图形符号或ASCII码 61 { 62 chinese = this.conversionStr(chinese, "ISO8859-1", "GB2312"); 63 chinese = chinese.substring(0, 1); 64 } 65 } 66 67 return chinese; 68 } 69 70 /** 71 * 字符串编码转换 72 * @param str 要转换编码的字符串 73 * @param charsetName 原来的编码 74 * @param toCharsetName 转换后的编码 75 * @return 经过编码转换后的字符串 76 */ 77 private static String conversionStr(String str, String charsetName,String toCharsetName) { 78 try { 79 str = new String(str.getBytes(charsetName), toCharsetName); 80 } catch (UnsupportedEncodingException ex) { 81 System.out.println("字符串编码转换异常:" + ex.getMessage()); 82 } 83 return str; 84 } 85 86 }
调用:
1 String s = ChineseInital.getAllFirstLetter("中华人民共和国"); 2 System.out.println(s); // => "zhrmghg"
(整理于网络)