lua处理中英文混合字符串

我用的是下面这个方法
复制代码
function chsize(char)
    if not char then
        return 0
    elseif char > 240 then
        return 4
    elseif char > 225 then
        return 3
    elseif char > 192 then
        return 2
    else
        return 1
    end    
end

function utf8len(str)
    local len = 0
    local currentIndex = 1
    while currentIndex <= #str do
        local char = string.byte(str,currentIndex)
        currentIndex = currentIndex + chsize(char)
        len = len + 1
    end
    return len
end

function utf8sub(str,startChar,numChars)
   local startIndex = 1
   while startChar > 1 do
       local char = string.byte(str,startIndex)
       startIndex = startIndex + chsize(char)
       startChar = startChar - 1
   end

   local currentIndex = startIndex
   while numChars > 0 and currentIndex <= #str do
       local char = string.byte(str,currentIndex)
       currentIndex = currentIndex + chsize(char)
       numChars = numChars - 1
   end

   return str:sub(startIndex,currentIndex - 1)
end

STR = sale_name(STYLE_DKM)
LEN = utf8len(STR)-1    --字符串个数
function chsize(char)
    if not char then
        return 0
    elseif char > 240 then
        return 4
    elseif char > 225 then
        return 3
    elseif char > 192 then
        return 2
    else
        return 1
    end    
end

function utf8len(str)
    local len = 0
    local currentIndex = 1
    while currentIndex <= #str do
        local char = string.byte(str,currentIndex)
        currentIndex = currentIndex + chsize(char)
        len = len + 1
    end
    return len
end

function utf8sub(str,startChar,numChars)
   local startIndex = 1
   while startChar > 1 do
       local char = string.byte(str,startIndex)
       startIndex = startIndex + chsize(char)
       startChar = startChar - 1
   end

   local currentIndex = startIndex
   while numChars > 0 and currentIndex <= #str do
       local char = string.byte(str,currentIndex)
       currentIndex = currentIndex + chsize(char)
       numChars = numChars - 1
   end

   return str:sub(startIndex,currentIndex - 1)
end

STR = "中英文混合zh123英文"


STR = sale_name(STYLE_DKM)
LEN = utf8len(STR)
return utf8sub(STR,LEN-2,3)   --倒数3个

 
复制代码

 

还有一个方法也测试通过了,变量名字命名不错,而且也考虑了if的判断性能,我没有使用下面的

复制代码
--返回截取的实际Index
function SubStringGetTrueIndex(str, index)
    local curIndex = 0
    local i = 1
    local lastCount = 1
    repeat
        lastCount = SubStringGetByteCount(str, i)
        i = i + lastCount
        curIndex = curIndex + 1
    until (curIndex >= index)
    return i - lastCount
end

--返回当前字符实际占用的字符数
function SubStringGetByteCount(str, index)
    local curByte = string.byte(str, index)
    local byteCount = 1
    if curByte == nil then
        byteCount = 0
    elseif curByte > 0 and curByte <= 127 then
        byteCount = 1
    elseif curByte >= 192 and curByte <= 223 then
        byteCount = 2
    elseif curByte >= 224 and curByte <= 239 then
        byteCount = 3
    elseif curByte >= 240 and curByte <= 247 then
        byteCount = 4
    end
    return byteCount
end

--截取中英混合的字符串
function SubString(str, startIndex, endIndex)
    if type(str) ~= "string" then
        return
    end
    if startIndex == nil or startIndex < 0 then
        return
    end

    if endIndex == nil or endIndex < 0 then
        return
    end

    return string.sub(str, SubStringGetTrueIndex(str, startIndex),
            SubStringGetTrueIndex(str, endIndex + 1) - 1)
end

STR = "中英文混合zh123英文"
LEN = string.len(STR) return SubString(sale_name(STYLE_DKM), 0, 3)..SubString(sale_name(STYLE_DKM), LEN-4, LEN)
复制代码

 

posted @   曦花  阅读(128)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现
点击右上角即可分享
微信分享提示