用java转换文件的字符集

中文乱码真的是让人很头疼问题，有了这个方法应该能缓解这种头疼，用的是递归方式查找文件，直接在原文件中修改，小心使用(在本地测试效果有点诡异呀，没有达到预期效果)。
  1 package com.hy.util;
  2 
  3 import info.monitorenter.cpdetector.io.*;
  4 
  5 import java.io.BufferedReader;
  6 import java.io.File;
  7 import java.io.FileInputStream;
  8 import java.io.FileNotFoundException;
  9 import java.io.FileOutputStream;
 10 import java.io.IOException;
 11 import java.io.InputStreamReader;
 12 import java.io.OutputStreamWriter;
 13 
 14 public class CharacterChange{
 15 
 16     public static void main(String[] args) throws FileNotFoundException, IOException {
 17 
 18         // 封装目录,需要修改文件格式的路径
 19         File srcFolder = new File("F:\\test");
 20 
 21         String newCharater = "GBK";
 22 
 23         getAllJavaFilePaths(srcFolder, newCharater);
 24     }
 25 
 26     private static void getAllJavaFilePaths(File srcFolder, String newCharater) throws IOException {
 27 
 28         // 获取该目录下所有的文件或者文件夹的File数组
 29         File[] fileArray = srcFolder.listFiles();
 30 
 31         // 遍历该File数组，得到每一个File对象
 32         for (File file : fileArray) {
 33 
 34             // 继续判断是否以特定文件结尾,不是的话继续调用getAllJavaFilePaths()方法
 35             if (file.isDirectory()) {
 36                 getAllJavaFilePaths(file, newCharater);
 37             } else {
 38                 if (file.getName().endsWith(".sql")) {
 39                     try {
 40                         FileInputStream fis = new FileInputStream(file);
 41                         //oldcCharacter 获取特定的字符集
 42                         String oldcCharacter = getChartsetName(file);
 43                         InputStreamReader isr = new InputStreamReader(fis, oldcCharacter);
 44                         BufferedReader br = new BufferedReader(isr);
 45                         String str = null;
 46                         // 创建StringBuffer字符串缓存区
 47                         StringBuffer sb = new StringBuffer();
 48                         // 通过readLine()方法遍历读取文件
 49                         while ((str = br.readLine()) != null) {
 50                             // 使用readLine()方法无法进行换行,需要手动在原本输出的字符串后面加"\n"或"\r"
 51                             str += "\n";
 52                             sb.append(str);
 53                         }
 54                         String fileSource = sb.toString();
 55                         // 以GBK格式写入文件,file.getAbsolutePath()即该文件的绝对路径,false代表不追加直接覆盖,true代表追加文件
 56                         FileOutputStream fos = new FileOutputStream(file.getAbsolutePath(), false);
 57                         OutputStreamWriter osw = new OutputStreamWriter(fos, newCharater);
 58                         try {
 59                             osw.write(fileSource);
 60                             System.out.println(
 61                                     "将：" + oldcCharacter + " 的文件：" + file.getAbsolutePath() + "修改字符集为：" + newCharater);
 62                         } finally {
 63                             osw.flush();
 64                             osw.close();
 65                             fos.close();
 66                             br.close();
 67                             isr.close();
 68                             fis.close();
 69                         }
 70                     } catch (Exception e) {
 71                     }
 72                 } else {
 73                     System.err.println("该文件以忽略：" + file.getAbsolutePath());
 74                 }
 75             }
 76         }
 77     }
 78 
 79     public static String getChartsetName(File file) {
 80         String chartsetName = null;
 81         // 获取文件编码格式
 82         CodepageDetectorProxy detector = CodepageDetectorProxy.getInstance();
 83         detector.add(new ParsingDetector(true));
 84         detector.add(JChardetFacade.getInstance());
 85         detector.add(ASCIIDetector.getInstance());
 86         detector.add(UnicodeDetector.getInstance());
 87         java.nio.charset.Charset charset = null;
 88         try {
 89             if (file != null) {
 90                 charset = detector.detectCodepage(file.toURL());
 91             }
 92         } catch (Exception ex) {
 93             ex.printStackTrace();
 94         }
 95         if (charset != null) {
 96             chartsetName = charset.name();
 97         } else {
 98             chartsetName = "未知的编码";
 99         }
100         return chartsetName;
101     }
102  /*
103   *  <dependency>
104              <groupId>net.sourceforge.jchardet</groupId>
105              <artifactId>jchardet</artifactId>
106              <version>1.0</version>
107          </dependency>
108          <dependency>
109              <groupId>antlr</groupId>
110              <artifactId>antlr</artifactId>
111              <version>2.7.7</version>
112          </dependency>
113     */
114 }
posted @ 2019-08-23 17:46 篮球是圆的阅读(1884) 评论(0) 收藏举报
刷新页面返回顶部
篮球是圆的

用java转换文件的字符集

公告