Lua string.sub截取UTF8 中英混合字符
1 --截取中英混合的UTF8字符串,endIndex可缺省
2 function SubStringUTF8(str, startIndex, endIndex)
3 if startIndex < 0 then
4 startIndex = SubStringGetTotalIndex(str) + startIndex + 1;
5 end
7 if endIndex ~= nil and endIndex < 0 then
8 endIndex = SubStringGetTotalIndex(str) + endIndex + 1;
9 end
11 if endIndex == nil then
12 return string.sub(str, SubStringGetTrueIndex(str, startIndex));
13 else
14 return string.sub(str, SubStringGetTrueIndex(str, startIndex), SubStringGetTrueIndex(str, endIndex + 1) - 1);
15 end
16 end
18 --获取中英混合UTF8字符串的真实字符数量
19 function SubStringGetTotalIndex(str)
20 local curIndex = 0;
21 local i = 1;
22 local lastCount = 1;
23 repeat
24 lastCount = SubStringGetByteCount(str, i)
25 i = i + lastCount;
26 curIndex = curIndex + 1;
27 until(lastCount == 0);
28 return curIndex - 1;
29 end
31 function SubStringGetTrueIndex(str, index)
32 local curIndex = 0;
33 local i = 1;
34 local lastCount = 1;
35 repeat
36 lastCount = SubStringGetByteCount(str, i)
37 i = i + lastCount;
38 curIndex = curIndex + 1;
39 until(curIndex >= index);
40 return i - lastCount;
41 end
43 --返回当前字符实际占用的字符数
44 function SubStringGetByteCount(str, index)
45 local curByte = string.byte(str, index)
46 local byteCount = 1;
47 if curByte == nil then
48 byteCount = 0
49 elseif curByte > 0 and curByte <= 127 then
50 byteCount = 1
51 elseif curByte>=192 and curByte<=223 then
52 byteCount = 2
53 elseif curByte>=224 and curByte<=239 then
54 byteCount = 3
55 elseif curByte>=240 and curByte<=247 then
56 byteCount = 4
57 end
58 return byteCount;
59 end
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步