php实现获取汉字的首字母
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 | <?php /** * Modified by http://iulog.com @ 2013-05-07 * 修复二分法查找方法 * 汉字拼音首字母工具类 * 注: 英文的字串:不变返回(包括数字) eg .abc123 => abc123 * 中文字符串:返回拼音首字符 eg. 测试字符串 => CSZFC * 中英混合串: 返回拼音首字符和英文 eg. 我i我j => WIWJ * eg. * $py = new str2PY(); * $result = $py->getInitials('啊吧才的饿飞就好i就看了吗你哦平去人是他uv我想一在'); */ class str2PY { private $_pinyins = array ( 176161 => 'A' , 176197 => 'B' , 178193 => 'C' , 180238 => 'D' , 182234 => 'E' , 183162 => 'F' , 184193 => 'G' , 185254 => 'H' , 187247 => 'J' , 191166 => 'K' , 192172 => 'L' , 194232 => 'M' , 196195 => 'N' , 197182 => 'O' , 197190 => 'P' , 198218 => 'Q' , 200187 => 'R' , 200246 => 'S' , 203250 => 'T' , 205218 => 'W' , 206244 => 'X' , 209185 => 'Y' , 212209 => 'Z' , ); private $_charset = null; /** * 构造函数, 指定需要的编码 default: utf-8 * 支持utf-8, gb2312 * * @param unknown_type $charset */ public function __construct( $charset = 'utf-8' ) { $this ->_charset = $charset ; } /** * 中文字符串 substr * * @param string $str * @param int $start * @param int $len * @return string */ private function _msubstr ( $str , $start , $len ) { $start = $start * 2; $len = $len * 2; $strlen = strlen ( $str ); $result = '' ; for ( $i = 0; $i < $strlen ; $i ++ ) { if ( $i >= $start && $i < ( $start + $len ) ) { if ( ord( substr ( $str , $i , 1)) > 129 ) $result .= substr ( $str , $i , 2); else $result .= substr ( $str , $i , 1); } if ( ord( substr ( $str , $i , 1)) > 129 ) $i ++; } return $result ; } /** * 字符串切分为数组 (汉字或者一个字符为单位) * * @param string $str * @return array */ private function _cutWord( $str ) { $words = array (); while ( $str != "" ) { if ( $this ->_isAscii( $str ) ) { /*非中文*/ $words [] = $str [0]; $str = substr ( $str , strlen ( $str [0]) ); } else { $word = $this ->_msubstr( $str , 0, 1 ); $words [] = $word ; $str = substr ( $str , strlen ( $word ) ); } } return $words ; } /** * 判断字符是否是ascii字符 * * @param string $char * @return bool */ private function _isAscii( $char ) { return ( ord( substr ( $char ,0,1) ) < 160 ); } /** * 判断字符串前3个字符是否是ascii字符 * * @param string $str * @return bool */ private function _isAsciis( $str ) { $len = strlen ( $str ) >= 3 ? 3: 2; $chars = array (); for ( $i = 1; $i < $len -1; $i ++ ){ $chars [] = $this ->_isAscii( $str [ $i ] ) ? 'yes' : 'no' ; } $result = array_count_values ( $chars ); if ( empty ( $result [ 'no' ]) ){ return true; } return false; } /** * 获取中文字串的拼音首字符 * * @param string $str * @return string */ public function getInitials( $str ) { if ( empty ( $str ) ) return '' ; if ( $this ->_isAscii( $str [0]) && $this ->_isAsciis( $str )){ return $str ; } $result = array (); if ( $this ->_charset == 'utf-8' ){ $str = iconv( 'utf-8' , 'gb2312' , $str ); } $words = $this ->_cutWord( $str ); foreach ( $words as $word ) { if ( $this ->_isAscii( $word ) ) { /*非中文*/ $result [] = $word ; continue ; } $code = ord( substr ( $word ,0,1) ) * 1000 + ord( substr ( $word ,1,1) ); /*获取拼音首字母A--Z*/ if ( ( $i = $this ->_search( $code )) != -1 ){ $result [] = $this ->_pinyins[ $i ]; } } return strtoupper (implode( '' , $result )); } private function _getChar( $ascii ) { if ( $ascii >= 48 && $ascii <= 57){ return chr ( $ascii ); /*数字*/ } elseif ( $ascii >=65 && $ascii <=90 ){ return chr ( $ascii ); /* A--Z*/ } elseif ( $ascii >=97 && $ascii <=122){ return chr ( $ascii -32); /* a--z*/ } else { return '-' ; /*其他*/ } } /** * 查找需要的汉字内码(gb2312) 对应的拼音字符( 二分法 ) * * @param int $code * @return int */ private function _search( $code ) { $data = array_keys ( $this ->_pinyins); $lower = 0; $upper = sizeof( $data )-1; $middle = (int) round (( $lower + $upper ) / 2); if ( $code < $data [0] ) return -1; for (;;) { if ( $lower > $upper ){ return $data [ $lower -1]; } $tmp = (int) round (( $lower + $upper ) / 2); if ( !isset( $data [ $tmp ]) ){ return $data [ $middle ]; } else { $middle = $tmp ; } if ( $data [ $middle ] < $code ){ $lower = (int) $middle + 1; } else if ( $data [ $middle ] == $code ) { return $data [ $middle ]; } else { $upper = (int) $middle - 1; } } } } ?> |