GBK-UTF-8-Unicode编码字节问题
import java.io.UnsupportedEncodingException; /** * Java中 中英文编码 所占字节位 * @author NEU-2015 * */ public class DE { public static void main(String[] args) { String[] charsetNames = { "UTF-8", "UTF-16", "UTF-16BE", "UTF-16LE", "UTF-32", "UTF-32BE", "UTF-32LE", "UNICODE", "GBK", "GB2312", "GB18030", "ISO8859-1", "BIG5", "ASCII" }; for (int i = 0; i < charsetNames.length; i++) { printByteLength(charsetNames[i]); } } public static void printByteLength(String charsetName) { String en = "a"; // 一个英文字符 String zh = "啊"; // 一个中文字符 try { System.out.println(charsetName + "编码英文字符所占字节数:" + en.getBytes(charsetName).length); System.out.println(charsetName + "编码中文字符所占字节数:" + zh.getBytes(charsetName).length); System.out.println(); } catch (UnsupportedEncodingException e) { System.out.println("非法编码格式!"); } } }
对于GBK编码标准,英文占用1个字节,中文占用2个字节
对于UTF-8编码标准,英文占用1个字节,中文占用3个字节