lua处理中英文混合字符串

我用的是下面这个方法
function chsize(char)
    if not char then
        return 0
    elseif char > 240 then
        return 4
    elseif char > 225 then
        return 3
    elseif char > 192 then
        return 2
    else
        return 1
    end    
end

function utf8len(str)
    local len = 0
    local currentIndex = 1
    while currentIndex <= #str do
        local char = string.byte(str,currentIndex)
        currentIndex = currentIndex + chsize(char)
        len = len + 1
    end
    return len
end

function utf8sub(str,startChar,numChars)
   local startIndex = 1
   while startChar > 1 do
       local char = string.byte(str,startIndex)
       startIndex = startIndex + chsize(char)
       startChar = startChar - 1
   end

   local currentIndex = startIndex
   while numChars > 0 and currentIndex <= #str do
       local char = string.byte(str,currentIndex)
       currentIndex = currentIndex + chsize(char)
       numChars = numChars - 1
   end

   return str:sub(startIndex,currentIndex - 1)
end

STR = sale_name(STYLE_DKM)
LEN = utf8len(STR)-1    --字符串个数
function chsize(char)
    if not char then
        return 0
    elseif char > 240 then
        return 4
    elseif char > 225 then
        return 3
    elseif char > 192 then
        return 2
    else
        return 1
    end    
end

function utf8len(str)
    local len = 0
    local currentIndex = 1
    while currentIndex <= #str do
        local char = string.byte(str,currentIndex)
        currentIndex = currentIndex + chsize(char)
        len = len + 1
    end
    return len
end

function utf8sub(str,startChar,numChars)
   local startIndex = 1
   while startChar > 1 do
       local char = string.byte(str,startIndex)
       startIndex = startIndex + chsize(char)
       startChar = startChar - 1
   end

   local currentIndex = startIndex
   while numChars > 0 and currentIndex <= #str do
       local char = string.byte(str,currentIndex)
       currentIndex = currentIndex + chsize(char)
       numChars = numChars - 1
   end

   return str:sub(startIndex,currentIndex - 1)
end

STR = "中英文混合zh123英文"


STR = sale_name(STYLE_DKM)
LEN = utf8len(STR)
return utf8sub(STR,LEN-2,3)   --倒数3个

 

 

还有一个方法也测试通过了,变量名字命名不错,而且也考虑了if的判断性能,我没有使用下面的

--返回截取的实际Index
function SubStringGetTrueIndex(str, index)
    local curIndex = 0
    local i = 1
    local lastCount = 1
    repeat
        lastCount = SubStringGetByteCount(str, i)
        i = i + lastCount
        curIndex = curIndex + 1
    until (curIndex >= index)
    return i - lastCount
end

--返回当前字符实际占用的字符数
function SubStringGetByteCount(str, index)
    local curByte = string.byte(str, index)
    local byteCount = 1
    if curByte == nil then
        byteCount = 0
    elseif curByte > 0 and curByte <= 127 then
        byteCount = 1
    elseif curByte >= 192 and curByte <= 223 then
        byteCount = 2
    elseif curByte >= 224 and curByte <= 239 then
        byteCount = 3
    elseif curByte >= 240 and curByte <= 247 then
        byteCount = 4
    end
    return byteCount
end

--截取中英混合的字符串
function SubString(str, startIndex, endIndex)
    if type(str) ~= "string" then
        return
    end
    if startIndex == nil or startIndex < 0 then
        return
    end

    if endIndex == nil or endIndex < 0 then
        return
    end

    return string.sub(str, SubStringGetTrueIndex(str, startIndex),
            SubStringGetTrueIndex(str, endIndex + 1) - 1)
end

STR = "中英文混合zh123英文"
LEN = string.len(STR) return SubString(sale_name(STYLE_DKM), 0, 3)..SubString(sale_name(STYLE_DKM), LEN-4, LEN)

 

posted @ 2023-02-21 11:41  曦花  阅读(98)  评论(0编辑  收藏  举报