转自博文《Java文件编码格式转换》:
默认被转换的格式为GBK,转换成的格式为UTF-8
import info.monitorenter.cpdetector.CharsetPrinter;
import java.io.BufferedReader;import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
public class EncodeFormatTransfer {
public static String DefaultSrcEncodeFormat = "GBK"; public static String DefaultDestEncodeFormat = "UTF-8"; public static String UnsupportedEncodingExceptionError = "编码格式错误!"; public static String FileNotFoundExceptionError = "文件不存在!"; public static String IOExceptionError = "文件读写错误!"; public static String IsUtf8File = "文件是UTF-8编码格式!"; public static String IsNotUtf8File = "文件不是UTF-8编码格式!"; public static String readFile(String path,String encodeFormat){ if((encodeFormat==null || encodeFormat.equals(""))){ if(isUTF8File(path)) encodeFormat = DefaultDestEncodeFormat; else encodeFormat = DefaultSrcEncodeFormat; } try { String context = ""; InputStreamReader isr; isr = new InputStreamReader(new FileInputStream(path),encodeFormat); BufferedReader br=new BufferedReader(isr); String line; while((line=br.readLine())!=null){ context += line + "\r\n"; System.out.println(line); } br.close();
return context; } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block System.out.println(UnsupportedEncodingExceptionError); e.printStackTrace(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block System.out.println(FileNotFoundExceptionError); e.printStackTrace(); }catch (IOException e) { // TODO Auto-generated catch block System.out.println(IOExceptionError); e.printStackTrace(); }; return "";
}
/*public static boolean isUTF8File(String path){ try { File file = new File(path); CharsetPrinter detector = new CharsetPrinter(); String charset = detector.guessEncoding(file); InputStream in = new java.io.FileInputStream(file); byte[] b = new byte[3]; in.read(b); in.close();
System.out.println(b[0] + " " + b[1] + " " + b[2]);
if (b[0] == 0xEF && b[1] == 0xBB && b[2] == 0XBF){
System.out.println(IsUtf8File);
return true;
}
if (b[0] == -17 && b[1] == -69 && b[2] == -65){
System.out.println(IsUtf8File);
return true;
} } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println(FileNotFoundExceptionError); }catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println(IOExceptionError); }
System.out.println(IsNotUtf8File); return false;
}*/
public static boolean isUTF8File(String path){ try { File file = new File(path); CharsetPrinter detector = new CharsetPrinter(); String charset = detector.guessEncoding(file); if(charset.equalsIgnoreCase(DefaultDestEncodeFormat)){ System.out.println(IsUtf8File); return true;
} } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println(FileNotFoundExceptionError); }catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); System.out.println(IOExceptionError); }
System.out.println(IsNotUtf8File); return false;
}
public static String transfer(String context,String encodeFormat) { if(encodeFormat==null || encodeFormat.equals("")) encodeFormat = DefaultDestEncodeFormat; try { byte[] content = context.getBytes(); String result = new String(content,encodeFormat); return result; } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block System.out.println(UnsupportedEncodingExceptionError); e.printStackTrace(); } return "";
}
public static void writeFile(String context,String path,String destEncode){ File file = new File(path); if(file.exists()) file.delete(); BufferedWriter writer; try { FileOutputStream fos = new FileOutputStream(path,true); writer = new BufferedWriter(new OutputStreamWriter(fos, destEncode)); writer.append(context); writer.close(); } catch (IOException e) { System.out.println(IOExceptionError); e.printStackTrace(); }
}
public static void writeFile(String context,String path){ File file = new File(path); if(file.exists()) file.delete(); Writer writer; try { writer = new FileWriter(file, true); writer.append(context); writer.close(); } catch (IOException e) { System.out.println(IOExceptionError); e.printStackTrace(); }
}
public static void transfer(String srcPath,String destPath,String srcEncode,String destEncode){ if(destPath==null || destPath.equals("")) destPath = srcPath; String context = readFile(srcPath,srcEncode); context = transfer(context,destEncode); writeFile(context,destPath,destEncode);
}
public static void transfer(String srcPath,String destPath,String destEncode){ if(isUTF8File(srcPath)){ transfer(srcPath,destPath,DefaultDestEncodeFormat,destEncode); }else{ transfer(srcPath,destPath,DefaultSrcEncodeFormat,destEncode); }
}
public static void main(String args[]){ String path1 = "f:/Notepad1.java"; String path2 = "f:/Notepad2.java"; transfer(path1,path2,"UTF-8"); transfer(path1,path2,"UTF-8","UTF-8");
} }
java读取文件,处理过程中,可能因为文件的编码问题导致了中文乱码。有时需要将UTF-8的改为ANSI的编码。以下代码就可以判断文件是什么编码方式。
主要jar包:cpdetector.jar
下载地址: http://cpdetector.sourceforge.net/
同时还需jchardet-1.0.jar这个包,否则detector.add(cpdetector.io.JChardetFacade.getInstance()); 会报错;
下载地址: http://www.jarfinder.com/index.php/jars/versionInfo/40297
还有一个antlr.jar,不然运行过程中detector.add(new ParsingDetector(false));会报错;
下载地址: http://www.java2s.com/Code/Jar/ABC/Downloadantlrjar.htm
主要jar包:cpdetector.jar
下载地址: http://cpdetector.sourceforge.net/
同时还需jchardet-1.0.jar这个包,否则detector.add(cpdetector.io.JChardetFacade.getInstance()); 会报错;
下载地址: http://www.jarfinder.com/index.php/jars/versionInfo/40297
还有一个antlr.jar,不然运行过程中detector.add(new ParsingDetector(false));会报错;
下载地址: http://www.java2s.com/Code/Jar/ABC/Downloadantlrjar.htm