lua 获取字符长度

-- 字符串保存到table
function stringToTable(s)
    local tb = {}

    --[[
    UTF8的编码规则:
    1. 字符的第一个字节范围: 0x00—0x7F(0-127),或者 0xC2—0xF4(194-244);
        UTF8 是兼容 ascii 的,所以 0~127 就和 ascii 完全一致
    2. 0xC0, 0xC1,0xF5—0xFF(192, 193 和 245-255)不会出现在UTF8编码中
    3. 0x80—0xBF(128-191)只会出现在第二个及随后的编码中(针对多字节编码,如汉字)
    ]]
    for utfChar in string.gmatch(s, "[%z\1-\127\194-\244][\128-\191]*") do
        table.insert(tb, utfChar)
    end

    return tb
end

-- 获取字符串长度,英文字符为一个单位长, 中文字符为2个单位长
function getUTFLen(s)
    local sTable = stringToTable(s)
    local len = 0
    local charLen = 0

    for i=1,#sTable do
        local utfCharLen = string.len(sTable[i])
        -- 长度大于1可认为为中文
        if utfCharLen > 1 then
            charLen = 2         --将charLen设为1,可获取中文,英文的字符个数,以下举例,将其方法命名为:function getNewUTFLen(s)
        else
            charLen = 1
        end
        -- charLen = 1
        len = len + charLen
    end

    return len
end

-- 获取字符串长度,不管中文,英文一律一个字符为1单位长
function getNewUTFLen(s)
    local sTable = stringToTable(s)
    local len = 0
    local charLen = 0

    for i = 1, #sTable do
        local utfCharLen = string.len(sTable[i])
        if utfCharLen > 1 then
            charLen = 1         -- 修改为1
        else
            charLen = 1
        end

        len = len + charLen
    end

    return len
end

-- 获取中文,英文字符个数
local str = "一二三@#[]【】789&*():"
print(getNewUTFLen(str))         -- 17


local s = "①贰aA#}。"
local sTab = stringToTable(s)
for i = 1, #sTab do
    local outStr = string.format("sTab index:%d,str:\"%s\",Len:%s,byte:%d",
        i,
        sTab[i],
        string.len(sTab[i]),
        string.byte(sTab[i])
    )
    print(outStr)
end
print("#sTab = " .. #sTab)
print("getUTFLen = " .. getUTFLen(s))

 

输出结果:

sTab index:1,str:"",Len:3,byte:226
sTab index:2,str:"",Len:3,byte:232
sTab index:3,str:"a",Len:1,byte:97
sTab index:4,str:"A",Len:1,byte:65
sTab index:5,str:"#",Len:1,byte:35
sTab index:6,str:"}",Len:1,byte:125
sTab index:7,str:"",Len:3,byte:227
#sTab = 7
getUTFLen = 10

 

posted @ 2018-10-17 15:09  Code~  阅读(3466)  评论(0编辑  收藏  举报