unicode 与 utf8 互转
1 function isHex(val) { 2 if (typeof val !== 'string') return; 3 return /^[0-9a-fA-F]+$/.test(val); 4 } 5 6 function isBinary(val) { 7 if (typeof val !== 'string') return; 8 return /^[01]+$/.test(val); 9 } 10 11 function binaryToHex(binary) { 12 if (!isBinary(binary)) return; 13 const bytes = binary.match(/\d+?(?=(?:\d{8})*$)/g); 14 const hex = bytes.map((binary) => (+('0b' + binary)).toString(16).padStart(2, '0')); 15 return hex.join('').replace(/^0+/, ''); 16 } 17 18 function hexToBinary(hex) { 19 if (!isHex(hex)) return; 20 const bytes = hex.match(/\w+?(?=(?:\w{2})*$)/g); 21 const binary = bytes.map((hex) => (+('0x' + hex)).toString(2).padStart(8, '0')); 22 return binary.join('').replace(/^0+/, ''); 23 } 24 25 function hexCompare(a, b) { 26 if (isBinary(a)) a = binaryToHex(a); 27 if (isBinary(b)) b = binaryToHex(b); 28 if (!isHex(a) || !isHex(b)) return; 29 if (a === b) return 0; 30 const aLength = a.length, 31 bLength = b.length; 32 if (aLength !== bLength) return aLength > bLength ? 1 : -1; 33 for (let n = 0; n < aLength; n++) { 34 /* 字符串比较的是 unicode */ 35 if (a[n] > b[n]) return 1; 36 if (b[n] > a[n]) return -1; 37 } 38 } 39 40 function unicodeToUtf8(unicode) { 41 if (isBinary(unicode)) unicode = binaryToHex(unicode); 42 if (!isHex(unicode)) return; 43 unicode = unicode.toLowerCase(); 44 const ranges = ['7f', '7ff', 'ffff', '10ffff']; 45 if (hexCompare(unicode, ranges[3]) === 1) return; 46 if ([-1, 0].includes(hexCompare(unicode, ranges[0]))) return unicode; 47 let utf8Bytes = hexToBinary(unicode).match(/\d+?(?=(?:\d{6})*$)/g); 48 let firstByte = '1'.repeat(utf8Bytes.length) + utf8Bytes[0].padStart(8 - utf8Bytes.length, '0'); 49 50 let restBytes = utf8Bytes 51 .slice(1) 52 .map((binary) => '10' + binary) 53 .join(''); 54 55 return binaryToHex(firstByte + restBytes); 56 } 57 58 function utf8ToUnicode(utf8) { 59 if (isBinary(utf8)) utf8 = binaryToHex(utf8); 60 if (!isHex(utf8)) return; 61 utf8 = utf8.toLowerCase(); 62 const ranges = ['7f', 'dfbf', 'efbfbf', 'f7bfbfbf']; 63 if (hexCompare(utf8, ranges[3]) === 1) return; 64 if ([-1, 0].includes(hexCompare(utf8, ranges[0]))) return utf8; 65 const utf8Bytes = hexToBinary(utf8).match(/\d+?(?=(?:\d{8})*$)/g); 66 const firstByte = utf8Bytes[0].slice(utf8Bytes.length); 67 68 const restBytes = utf8Bytes 69 .slice(1) 70 .map((binary) => binary.slice(2)) 71 .join(''); 72 73 return binaryToHex(firstByte + restBytes); 74 } 75 76 function formatData(text) { 77 const code = text.charCodeAt(0), 78 binary = code.toString(2), 79 hex = code.toString(16), 80 unicode = hex, 81 utf8 = hexCompare(hex, '7f') === 1 ? encodeURI(text).replace(/%/g, '').toLowerCase() : hex; 82 83 return { binary, hex, unicode, utf8 }; 84 } 85 86 export { isHex, isBinary, binaryToHex, hexToBinary, hexCompare, unicodeToUtf8, utf8ToUnicode, formatData };