javascript wchar_t 宽字符 转化为 ascii字符码数组
String.prototype.charCodeAt
String.fromCharCode()
String.prototype.toUtfArray = function() { return this.split('').reduce(function(a, c) { var code = c.charCodeAt(0); a.push(code >> 8); a.push(code & 0x0ff); return a; }, []); }; String.fromUtfArray = function(a) { // 长度偶数个 否则前面补0 if (a.length % 2 ===1) { a.unshift(0); } var wa = [], code = 0; for (var i = 0; i< a.length; i++) { if (i%2===0) { code = a[i] << 8; } else { code |= a[i]; wa.push(code); } } return wa.map(function(c) { return String.fromCharCode(c); }).join(''); };
test:
var s1 = "你好a"; var a = s1.toUtfArray(); // [79, 96, 89, 125, 0, 97] console.log(a.toString()); var s2 = String.fromUtfArray(a); console.log(s2); // "你好a"
* UTF-8 变长
字符 UTF-8编码 Byte 1 Byte 2 Byte 3
A 01000001
Ö 11000011 10010110
中 11100100 10111000 10101101
---------------------------------------------------------
Binary Hex Comments
0xxxxxxx 0x00..0x7F Only byte of a 1-byte character encoding
10xxxxxx 0x80..0xBF Continuation bytes (1-3 continuation bytes)
110xxxxx 0xC0..0xDF First byte of a 2-byte character encoding
1110xxxx 0xE0..0xEF First byte of a 3-byte character encoding
11110xxx 0xF0..0xF7 First byte of a 4-byte character encoding