javascript wchar_t 宽字符 转化为 ascii字符码数组

String.prototype.charCodeAt

String.fromCharCode()

String.prototype.toUtfArray = function() {
	return this.split('').reduce(function(a, c) {
		var code = c.charCodeAt(0);
		a.push(code >> 8);
		a.push(code & 0x0ff);
		return a;
	}, []);
};
String.fromUtfArray = function(a) {
	// 长度偶数个 否则前面补0
	if (a.length % 2 ===1) {
		a.unshift(0);
	}
	var wa = [], code = 0;
	for (var i = 0; i< a.length; i++) {
		if (i%2===0) {
			code = a[i] << 8;
		} else {
			code |= a[i];
			wa.push(code);
		}
	}
	return wa.map(function(c) {
		return String.fromCharCode(c);
	}).join('');
};

  

test:

var s1 = "你好a";
var a = s1.toUtfArray(); // [79, 96, 89, 125, 0, 97]
console.log(a.toString());
var s2 = String.fromUtfArray(a);
console.log(s2); //  "你好a"

 

* UTF-8 变长

字符            UTF-8编码    Byte 1                   Byte 2             Byte 3

A                                   01000001                                                                                          

Ö                                  11000011            10010110       

中                                 11100100            10111000      10101101

---------------------------------------------------------

  Binary    Hex                 Comments
0xxxxxxx   0x00..0x7F   Only byte of a 1-byte character encoding
10xxxxxx   0x80..0xBF   Continuation bytes (1-3 continuation bytes)
110xxxxx    0xC0..0xDF   First byte of a 2-byte character encoding
1110xxxx    0xE0..0xEF   First byte of a 3-byte character encoding
11110xxx    0xF0..0xF7   First byte of a 4-byte character encoding

  

posted @ 2018-07-22 08:45  zhanghui_ming  阅读(179)  评论(0编辑  收藏  举报