PHP切割汉字
<?php /* @UTF-8编码的字符可能由1~3个字节组成。 */ /*--------------------------方法一截取中文字符串方法------------------------------*/ function msubstr($str, $start, $len) { $tmpstr = ""; $strlen = $start + $len; for ($i = 0; $i < $strlen; $i++) { if (ord(substr($str, $i, 1)) > 0xa0) //ord()函数返回字符串的第一个字符的ASCII值 { $tmpstr .= substr($str, $i, 2); $i++; } else { $tmpstr .= substr($str, $i, 1); } } return $tmpstr; } /*----------------------------第二种方法-----------------------------------*/ //截取的是UTF-8字符串 function utf_substr($str, $len) { $new_str = []; for ($i = 0; $i < $len; $i++) { $tem_str = substr($str, 0, 1); if (ord($tem_str > 127)) { $i++; if ($i < $len) { $new_str[] = substr($str, 0, 3); $str = substr($str, 3); } } else { $new_str[] = substr($str, 0, 1); $str = substr($str, 1); } } return join($new_str);//join()函数把数组元素组合为一个字符串 } /*-------------------------------------第三种方法(UTF-8)--------------------------------*/ function cutstr($string, $length) { preg_match_all("/[\x01-\x7f]|[\xc2-\xdf]|[\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info); $wordscut = ""; $j = 0; for ($i = 0; $i < count($info[0]); $i++) { $wordscut .= $info[0][$i]; $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1; if ($j > $length - 3) { return $wordscut . "..."; } } return join('', $info[0]); } $string = "312哈哈,这个组合很难切割哦"; echo cutstr($string, 10); /*---------------------------------下面是曾经用过的截取第三个的字符串的------------------------------*/ // $name1 = mysql_result($my_rst,0,"name"); // $name = preg_match("/([1-9][0-9]+)/",$name1,$r); // $name = $r[0]; // if($name == ""){ // $name=preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,2}'. // '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,1}).*#s', // '$1',$name1); // } /*--------------------------------------------第四种方法(UTF-8)---------------------------------------------*/ function cut_str($sourcestr, $cutlength) { $returnstr = ''; $i = 0; $n = 0; $str_length = strlen($sourcestr);//字符串的字节数 while ($n < $cutlength && $i <= $str_length) { $temp_str = substr($sourcestr, $i, 1); $ascnum = ord($temp_str);//得到字符串中第$i位字符的ASCII码 if ($ascnum >= 224) { $returnstr = $returnstr . substr($sourcestr, $i, 3);//根据UTF-8编码规范,将3个连续的字符计为单个字符 $i = $i + 3;//实际Byte记为3 $n++;//字串长度为1 } elseif ($ascnum >= 192)//如果ASCII位高于192 { $returnstr = $returnstr . substr($sourcestr, $i, 2);//根据UTF-8编码规范,将2个连续的字符记为单个字符 $i = $i + 2;//实际Byte记为2 $n++;//字串长度为1 } elseif ($ascnum >= 65 && $ascnum <= 90)//如果是大写字母 { $returnstr = $returnstr . substr($sourcestr, $i, 1); $i = $i + 1;//byte记为1 $n++;//但考虑到整体美观,大写字母计成一个高位字符 } else { $returnstr = $returnstr . substr($sourcestr, $i, 1); $i = $i + 1;//实际的Byte记为1 $n = $n + 0.5;//小写字母和半角标点等与半个高位字符宽... } } if ($str_length > $cutlength) { $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号 } return $returnstr; } /*--------------------第五种方法(UTF-8)---------------------------------------------*/ function FSubstr($title, $start, $len = "", $magic = true) { if ($len == "") $len = strlen($title); if ($start != 0) { $startv = ord(substr($title, $start, 1)); if ($startv >= 128) { if ($startv < 192) { for ($i = $start - 1; $i > 0; $i--) { $tempv = ord(substr($title, $i, 1)); if ($tempv >= 192) break; } $start = $i; } } } if (strlen($title) <= $len) return substr($title, $start, $len); $alen = 0; $blen = 0; $realnum = 0; $length = 0; for ($i = $start; $i < strlen($title); $i++) { $ctype = 0; $cstep = 0; $cur = substr($title, $i, 1); if ($cur == "&") { if (substr($title, $i, 4) == "<") { $cstep = 4; $length += 4; $i += 3; $realnum++; if ($magic) { $alen++; } } elseif (substr($title, $i, 4) == ">") { $cstep = 4; $length += 4; $i += 3; $realnum++; if ($magic) { $alen++; } } elseif (substr($title, $i, 5) == "&") { $cstep = 5; $length += 5; $i += 4; $realnum++; if ($magic) { $alen++; } } elseif (substr($title, $i, 6) == """) { $cstep = 6; $length += 6; $i += 5; $realnum++; if ($magic) { $alen++; } } elseif (preg_match("/&#(\d+);?/i", substr($title, $i), $match)) { $cstep = strlen($match[0]); $length += strlen($match[0]); $i += strlen($match[0]) - 1; $realnum++; if ($magic) { $blen++; $ctype = 1; } } } else { if (ord($cur) >= 252) { $cstep = 6; $length += 6; $i += 5; $realnum++; if ($magic) { $blen++; $ctype = 1; } } elseif (ord($cur) >= 248) { $cstep = 5; $length += 5; $i += 4; $realnum++; if ($magic) { $ctype = 1; $blen++; } } elseif (ord($cur) >= 240) { $cstep = 4; $length += 4; $i += 3; $realnum++; if ($magic) { $blen++; $ctype = 1; } } elseif (ord($cur) >= 224) { $cstep = 3; $length += 3; $i += 2; $realnum++; if ($magic) { $ctype = 1; $blen++; } } elseif (ord($cur) >= 192) { $ctype = 2; $length += 2; $i += 1; $realnum++; if ($magic) { $blen++; $ctype = 1; } } elseif (ord($cur) >= 128) { $length += 1; } else { $cstep = 1; $length += 1; $realnum++; if ($magic) { if (ord($cur) >= 65 && ord($cur) <= 90) { $blen++; } else { $alen++; } } } } if ($magic) { if (($blen * 2 + $alen) == ($len * 2)) break; if (($blen * 2 + $alen) == ($len * 2) + 1) { if ($ctype == 1) { $length -= $cstep; break; } else { break; } } } else { if ($realnum == $len) break; } } unset($cur); unset($alen); unset($blen); unset($realnum); unset($ctype); unset($cstep); return substr($title, $start, $length); }
function utf8Substr($str, $from, $len) { return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' . '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s', '$1', $str); } $title = "你哈珀niad1纳斯达wop asdni你爱谁都没阿斯顿撒旦12ccs- sd"; $title = utf8Substr($title, 0, 15); echo $title;
?>