java读取文件字符集示例方法
java读取文件字符集示例方法
发布时间:2016-12-28 来源:查字典编辑
摘要:复制代码代码如下:publicstaticStringgetCharset(Filefile){Stringcharset="GBK";by...

复制代码 代码如下:

public static String getCharset(File file) {

String charset = "GBK";

byte[] first3Bytes = new byte[3];

try {

boolean checked = false;

BufferedInputStream bis = new BufferedInputStream(

new FileInputStream(file));

bis.mark(0);

int read = bis.read(first3Bytes, 0, 3);

if (read == -1)

return charset;

if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE) {

charset = "UTF-16LE";

checked = true;

} else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1]

== (byte) 0xFF) {

charset = "UTF-16BE";

checked = true;

} else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1]

== (byte) 0xBB

&& first3Bytes[2] == (byte) 0xBF) {

charset = "UTF-8";

checked = true;

}

bis.reset();

if (!checked) {

int loc = 0;

while ((read = bis.read()) != -1) {

loc++;

if (read >= 0xF0)

break;

//单独出现BF以下的,也算是GBK

if (0x80 <= read && read <= 0xBF)

break;

if (0xC0 <= read && read <= 0xDF) {

read = bis.read();

if (0x80 <= read && read <= 0xBF)// 双字节 (0xC0 - 0xDF)

// (0x80 -

// 0xBF),也可能在GB编码内

continue;

else

break;

// 也有可能出错,但是几率较小

} else if (0xE0 <= read && read <= 0xEF) {

read = bis.read();

if (0x80 <= read && read <= 0xBF) {

read = bis.read();

if (0x80 <= read && read <= 0xBF) {

charset = "UTF-8";

break;

} else

break;

} else

break;

}

}

System.out.println(loc + " " + Integer.toHexString(read));

}

bis.close();

} catch (Exception e) {

e.printStackTrace();

}

return charset;

}

推荐文章
猜你喜欢
附近的人在看
推荐阅读
拓展阅读
相关阅读
网友关注
最新Java学习
热门Java学习
编程开发子分类