按照UTF-8编码格式,根据字节数截断文本存入数据表中
前提
在UTF-8编码中:一个中文等于3个字节,一个全角字符等于3个字节,一个英文字符等于1个字节,一个数字等于1个字节
问题
存入文本包含中文以及其他未知的字符,使得文本的字节数超出数据表结构的要求,导致存入数据表时报告 输入值的长度过长 的异常
代码
/*
* get trunc Text By Utf-8 encoding Bytes Length Limit
*
* */
public static String truncTextByUtf8BytesLengthLimit(String text, int limit) throws Exception{
if (isBlank(text)) {
return text;
}
if (limit <= 0) {
throw new IllegalArgumentException("The 'limit' parameter must greater than 0");
}
String result = null;
StringBuffer tempSb = new StringBuffer();
StringBuffer resultSb = new StringBuffer();
String[] textArr = text.split("");
for (int i = 0; i < textArr.length; i++) {
tempSb.append(textArr[i]);
int utf8Length = tempSb.toString().getBytes("UTF-8").length;
// System.out.println("bytes: " + utf8Length);
if (utf8Length < limit) {
resultSb.append(textArr[i]);
result = resultSb.toString();
}else if(utf8Length == limit){
// Append the last character, need add the last character to make utf8Length == limit
resultSb.append(textArr[i]);
return resultSb.toString();
}else if(utf8Length > limit){
// Discard the last character, no need add the last character which will make utf8Length > limit
return resultSb.toString();
}
}
return result;
}
测试结果
public static void main(String[] args) throws Exception{
String text = "測試測a試測試測試什麼abc123,,~!@3@!&(你好試測試測試測試什132141gegsfsfs試測試測試測試什sgsgsgs";
String result = truncTextByUtf8BytesLengthLimit(text, 11);
System.out.println(result);
}