按照UTF-8编码格式,根据字节数截断文本存入数据表中

前提

在UTF-8编码中:一个中文等于3个字节,一个全角字符等于3个字节,一个英文字符等于1个字节,一个数字等于1个字节

问题

存入文本包含中文以及其他未知的字符,使得文本的字节数超出数据表结构的要求,导致存入数据表时报告 输入值的长度过长 的异常

代码

	/*
	* get trunc Text By Utf-8 encoding Bytes Length Limit
	*
	* */
	public static String truncTextByUtf8BytesLengthLimit(String text, int limit) throws Exception{
		if (isBlank(text)) {
			return text;
		}
		if (limit <= 0) {
			throw new IllegalArgumentException("The 'limit' parameter must greater than 0");
		}
		String result = null;
		StringBuffer tempSb = new StringBuffer();
		StringBuffer resultSb = new StringBuffer();
		String[] textArr = text.split("");
		for (int i = 0; i < textArr.length; i++) {
			tempSb.append(textArr[i]);
			int utf8Length = tempSb.toString().getBytes("UTF-8").length;
                        // System.out.println("bytes: " + utf8Length);
			if (utf8Length < limit) {
				resultSb.append(textArr[i]);
				result = resultSb.toString();
			}else if(utf8Length == limit){
				// Append the last character, need add the last character to make utf8Length == limit
				resultSb.append(textArr[i]);
				return resultSb.toString();
			}else if(utf8Length > limit){
				// Discard the last character, no need add the last character which will make utf8Length > limit
				return resultSb.toString();
			}
		}
		return result;
	}

测试结果

	public static void main(String[] args) throws Exception{
		String text = "測試測a試測試測試什麼abc123,,~!@3@!&(你好試測試測試測試什132141gegsfsfs試測試測試測試什sgsgsgs";
		String result = truncTextByUtf8BytesLengthLimit(text, 11);
		System.out.println(result);
	}

posted @ 2022-07-06 14:37  229  阅读(80)  评论(0编辑  收藏  举报