lib.utf.js_Javascript教程-查字典教程网
lib.utf.js
lib.utf.js
发布时间:2016-12-30 来源:查字典编辑
摘要:/*UTF-8UTF-16convertionlibrary.*/*Copyright(C)1999MasanaoIzumo*2007MaB...

/* UTF-8 <=> UTF-16 convertion library.

*

/* Copyright (C) 1999 Masanao Izumo <iz@onicos.co.jp>

* 2007 Ma Bingyao <andot@ujn.edu.cn>

* Version: 2.1

* LastModified: Feb 25, 2007

* This library is free. You can redistribute it and/or modify it.

*//*

* Interfaces:

* utf8 = utf16to8(utf16);

* utf16 = utf16to8(utf8);

*/function utf16to8(str) {

if (str.match(/^[x00-x7f]*$/) != null) {

return str;

}

var out, i, j, len, c, c2;

out = [];

len = str.length;

for (i = 0, j = 0; i < len; i++, j++) {

c = str.charCodeAt(i);

if (c <= 0x7f) {

out[j] = str.charAt(i);

}

else if (c <= 0x7ff) {

out[j] = String.fromCharCode(0xc0 | (c >>> 6),

0x80 | (c & 0x3f));

}

else if (c < 0xd800 || c > 0xdfff) {

out[j] = String.fromCharCode(0xe0 | (c >>> 12),

0x80 | ((c >>> 6) & 0x3f),

0x80 | (c & 0x3f));

}

else {

if (++i < len) {

c2 = str.charCodeAt(i);

if (c <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff) {

c = ((c & 0x03ff) << 10 | (c2 & 0x03ff)) + 0x010000;

if (0x010000 <= c && c <= 0x10ffff) {

out[j] = String.fromCharCode(0xf0 | ((c >>> 18) & 0x3f),

0x80 | ((c >>> 12) & 0x3f),

0x80 | ((c >>> 6) & 0x3f),

0x80 | (c & 0x3f));

}

else {

out[j] = '?';

}

}

else {

i--;

out[j] = '?';

}

}

else {

i--;

out[j] = '?';

}

}

}

return out.join('');

}function utf8to16(str) {

if ((str.match(/^[x00-x7f]*$/) != null) ||

(str.match(/^[x00-xff]*$/) == null)) {

return str;

}

var out, i, j, len, c, c2, c3, c4, s;out = [];

len = str.length;

i = j = 0;

while (i < len) {

c = str.charCodeAt(i++);

switch (c >> 4) {

case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:

// 0xxx xxxx

out[j++] = str.charAt(i - 1);

break;

case 12: case 13:

// 110x xxxx 10xx xxxx

c2 = str.charCodeAt(i++);

out[j++] = String.fromCharCode(((c & 0x1f) << 6) |

(c2 & 0x3f));

break;

case 14:

// 1110 xxxx 10xx xxxx 10xx xxxx

c2 = str.charCodeAt(i++);

c3 = str.charCodeAt(i++);

out[j++] = String.fromCharCode(((c & 0x0f) << 12) |

((c2 & 0x3f) << 6) |

(c3 & 0x3f));

break;

case 15:

switch (c & 0xf) {

case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:

// 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx

c2 = str.charCodeAt(i++);

c3 = str.charCodeAt(i++);

c4 = str.charCodeAt(i++);

s = ((c & 0x07) << 18) |

((c2 & 0x3f) << 12) |

((c3 & 0x3f) << 6) |

(c4 & 0x3f) - 0x10000;

if (0 <= s && s <= 0xfffff) {

out[j] = String.fromCharCode(((s >>> 10) & 0x03ff) | 0xd800,

(s & 0x03ff) | 0xdc00);

}

else {

out[j] = '?';

}

break;

case 8: case 9: case 10: case 11:

// 1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx

i+=4;

out[j] = '?';

break;

case 12: case 13:

// 1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx

i+=5;

out[j] = '?';

break;

}

}

j++;

}

return out.join('');

}

相关阅读
推荐文章
猜你喜欢
附近的人在看
推荐阅读
拓展阅读
  • 大家都在看
  • 小编推荐
  • 猜你喜欢
  • 最新Javascript教程学习
    热门Javascript教程学习
    编程开发子分类