PHP切割汉字

<?php
/*
@UTF-8编码的字符可能由1~3个字节组成。

*/
/*--------------------------方法一截取中文字符串方法------------------------------*/
function msubstr($str, $start, $len)
{
    $tmpstr = "";
    $strlen = $start + $len;
    for ($i = 0; $i < $strlen; $i++) {
        if (ord(substr($str, $i, 1)) > 0xa0)   //ord()函数返回字符串的第一个字符的ASCII值
        {
            $tmpstr .= substr($str, $i, 2);
            $i++;
        } else {
            $tmpstr .= substr($str, $i, 1);
        }
    }
    return $tmpstr;
}


/*----------------------------第二种方法-----------------------------------*/
//截取的是UTF-8字符串
function utf_substr($str, $len)
{
    $new_str = [];
    for ($i = 0; $i < $len; $i++) {
        $tem_str = substr($str, 0, 1);
        if (ord($tem_str > 127)) {
            $i++;
            if ($i < $len) {
                $new_str[] = substr($str, 0, 3);
                $str = substr($str, 3);
            }
        } else {
            $new_str[] = substr($str, 0, 1);
            $str = substr($str, 1);
        }
    }
    return join($new_str);//join()函数把数组元素组合为一个字符串
}


/*-------------------------------------第三种方法(UTF-8)--------------------------------*/
function cutstr($string, $length)
{
    preg_match_all("/[\x01-\x7f]|[\xc2-\xdf]|[\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
    $wordscut = "";
    $j = 0;
    for ($i = 0; $i < count($info[0]); $i++) {
        $wordscut .= $info[0][$i];
        $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
        if ($j > $length - 3) {
            return $wordscut . "...";
        }
    }
    return join('', $info[0]);
}

$string = "312哈哈,这个组合很难切割哦";
echo cutstr($string, 10);


/*---------------------------------下面是曾经用过的截取第三个的字符串的------------------------------*/
// $name1 = mysql_result($my_rst,0,"name");
// $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
// $name = $r[0];
// if($name == ""){
// $name=preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,2}'.
// '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,1}).*#s',
// '$1',$name1);
// }

/*--------------------------------------------第四种方法(UTF-8)---------------------------------------------*/
function cut_str($sourcestr, $cutlength)
{
    $returnstr = '';
    $i = 0;
    $n = 0;
    $str_length = strlen($sourcestr);//字符串的字节数
    while ($n < $cutlength && $i <= $str_length) {
        $temp_str = substr($sourcestr, $i, 1);
        $ascnum = ord($temp_str);//得到字符串中第$i位字符的ASCII码
        if ($ascnum >= 224) {
            $returnstr = $returnstr . substr($sourcestr, $i, 3);//根据UTF-8编码规范,将3个连续的字符计为单个字符
            $i = $i + 3;//实际Byte记为3
            $n++;//字串长度为1
        } elseif ($ascnum >= 192)//如果ASCII位高于192
        {
            $returnstr = $returnstr . substr($sourcestr, $i, 2);//根据UTF-8编码规范,将2个连续的字符记为单个字符
            $i = $i + 2;//实际Byte记为2
            $n++;//字串长度为1
        } elseif ($ascnum >= 65 && $ascnum <= 90)//如果是大写字母
        {
            $returnstr = $returnstr . substr($sourcestr, $i, 1);
            $i = $i + 1;//byte记为1
            $n++;//但考虑到整体美观,大写字母计成一个高位字符
        } else {
            $returnstr = $returnstr . substr($sourcestr, $i, 1);
            $i = $i + 1;//实际的Byte记为1
            $n = $n + 0.5;//小写字母和半角标点等与半个高位字符宽...
        }
    }
    if ($str_length > $cutlength) {
        $returnstr = $returnstr . "...";//超过长度时在尾处加上省略号
    }
    return $returnstr;
}


/*--------------------第五种方法(UTF-8)---------------------------------------------*/

function FSubstr($title, $start, $len = "", $magic = true)
{
    if ($len == "") $len = strlen($title);

    if ($start != 0) {
        $startv = ord(substr($title, $start, 1));
        if ($startv >= 128) {
            if ($startv < 192) {
                for ($i = $start - 1; $i > 0; $i--) {
                    $tempv = ord(substr($title, $i, 1));
                    if ($tempv >= 192) break;
                }
                $start = $i;
            }
        }
    }

    if (strlen($title) <= $len) return substr($title, $start, $len);

    $alen = 0;
    $blen = 0;
    $realnum = 0;
    $length = 0;
    for ($i = $start; $i < strlen($title); $i++) {
        $ctype = 0;
        $cstep = 0;

        $cur = substr($title, $i, 1);
        if ($cur == "&") {
            if (substr($title, $i, 4) == "&lt;") {
                $cstep = 4;
                $length += 4;
                $i += 3;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (substr($title, $i, 4) == "&gt;") {
                $cstep = 4;
                $length += 4;
                $i += 3;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (substr($title, $i, 5) == "&amp;") {
                $cstep = 5;
                $length += 5;
                $i += 4;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (substr($title, $i, 6) == "&quot;") {
                $cstep = 6;
                $length += 6;
                $i += 5;
                $realnum++;
                if ($magic) {
                    $alen++;
                }
            } elseif (preg_match("/&#(\d+);?/i", substr($title, $i), $match)) {
                $cstep = strlen($match[0]);
                $length += strlen($match[0]);
                $i += strlen($match[0]) - 1;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            }
        } else {
            if (ord($cur) >= 252) {
                $cstep = 6;
                $length += 6;
                $i += 5;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            } elseif (ord($cur) >= 248) {
                $cstep = 5;
                $length += 5;
                $i += 4;
                $realnum++;
                if ($magic) {
                    $ctype = 1;
                    $blen++;
                }
            } elseif (ord($cur) >= 240) {
                $cstep = 4;
                $length += 4;
                $i += 3;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            } elseif (ord($cur) >= 224) {
                $cstep = 3;
                $length += 3;
                $i += 2;
                $realnum++;
                if ($magic) {
                    $ctype = 1;
                    $blen++;
                }
            } elseif (ord($cur) >= 192) {
                $ctype = 2;
                $length += 2;
                $i += 1;
                $realnum++;
                if ($magic) {
                    $blen++;
                    $ctype = 1;
                }
            } elseif (ord($cur) >= 128) {
                $length += 1;
            } else {
                $cstep = 1;
                $length += 1;
                $realnum++;
                if ($magic) {
                    if (ord($cur) >= 65 && ord($cur) <= 90) {
                        $blen++;
                    } else {
                        $alen++;
                    }
                }
            }
        }
        if ($magic) {
            if (($blen * 2 + $alen) == ($len * 2)) break;
            if (($blen * 2 + $alen) == ($len * 2) + 1) {
                if ($ctype == 1) {
                    $length -= $cstep;
                    break;
                } else {
                    break;
                }
            }
        } else {
            if ($realnum == $len) break;
        }
    }
    unset($cur);
    unset($alen);
    unset($blen);
    unset($realnum);
    unset($ctype);
    unset($cstep);

    return substr($title, $start, $length);
}
function utf8Substr($str, $from, $len)
{
    return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' .
        '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s',
        '$1', $str);
}

$title = "你哈珀niad1纳斯达wop asdni你爱谁都没阿斯顿撒旦12ccs- sd";

$title = utf8Substr($title, 0, 15);
echo $title;

?>

 

posted @ 2017-12-27 16:56  雨落知音  阅读(1958)  评论(0编辑  收藏  举报