导入依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
</dependencies>
获取字符集工具类
package www.taopanfeng.top.utils;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
/**
* @author TaoPanfeng
* @version 1.0
* @description
* @date 2020-03-13 11:52
*/
public class MyFileUtils
{
/**
* @description 根据文件获取字符集【例如UTF8 GBK】
* @param 文件字符串
* @author TaoPanfeng
* @date 2020-03-13 11:52
*/
public static String charset(String path)
{
String charset = "GBK";
byte[] first3Bytes = new byte[3];
try
{
boolean checked = false;
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(path));
bis.mark(0); // 读者注: bis.mark(0);修改为 bis.mark(100);我用过这段代码,需要修改上面标出的地方。
// Wagsn注:不过暂时使用正常,遂不改之
int read = bis.read(first3Bytes, 0, 3);
if (read == -1)
{
bis.close();
return charset; // 文件编码为 ANSI
} else if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE)
{
charset = "UTF-16LE"; // 文件编码为 Unicode
checked = true;
} else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF)
{
charset = "UTF-16BE"; // 文件编码为 Unicode big endian
checked = true;
} else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB
&& first3Bytes[2] == (byte) 0xBF)
{
charset = "UTF-8"; // 文件编码为 UTF-8
checked = true;
}
bis.reset();
if (!checked)
{
while ((read = bis.read()) != -1)
{
if (read >= 0xF0)
break;
if (0x80 <= read && read <= 0xBF) // 单独出现BF以下的,也算是GBK
break;
if (0xC0 <= read && read <= 0xDF)
{
read = bis.read();
if (0x80 <= read && read <= 0xBF) // 双字节 (0xC0 - 0xDF)
// (0x80 - 0xBF),也可能在GB编码内
continue;
else
break;
} else if (0xE0 <= read && read <= 0xEF)
{ // 也有可能出错,但是几率较小
read = bis.read();
if (0x80 <= read && read <= 0xBF)
{
read = bis.read();
if (0x80 <= read && read <= 0xBF)
{
charset = "UTF-8";
break;
} else
break;
} else
break;
}
}
}
bis.close();
} catch (Exception e)
{
e.printStackTrace();
}
//System.out.println("--文件-> [" + path + "] 采用的字符集为: [" + charset + "]");
return charset;
}
}
给类中新建测试方法
@Test
public void t00() throws Exception
{
for (int i = 1; i < 50; i++)
{
String number = i < 10 ? "0" + i : "" + i;
System.out.println(" @Test public void t" + number + "()throws Exception{}");
}
}
(过程1)测试输出到文件
@Test
public void t01() throws Exception
{
Collection<File> files = FileUtils.listFiles(new File("D:/test/"),
EmptyFileFilter.NOT_EMPTY,
DirectoryFileFilter.INSTANCE);
files.forEach((file) ->
{
String name = file.getName();
try
{
String content = FileUtils.readFileToString(file, MyFileUtils.charset(file.getAbsolutePath()));
StringBuilder sb = new StringBuilder();
sb.append("# " + name + "\n");
sb.append("```\n");
sb.append(content + "\n");
sb.append("```\n\n");
FileUtils.write(new File("D:/test/result.md"), sb.toString(), "utf8", true);
} catch (IOException e)
{
e.printStackTrace();
}
});
}
(过程2)打印后缀名
@Test
public void t02() throws Exception
{
String mainPath = "D:/Everything/SVN/workspace/0303编码实现/trunk/Source/";
String[] names = {"airflow", "data-service-security-enhance", "data-service-security-enhance-portal", "eip"};
HashSet<String> set = new HashSet<>();
for (int i = 0; i < names.length; i++)
{
Collection<File> files = FileUtils.listFiles(new File(mainPath + names[i]),
EmptyFileFilter.NOT_EMPTY,
DirectoryFileFilter.INSTANCE);
files.forEach((file) ->
{
String suffix = FilenameUtils.getExtension(file.getName());
if (suffix.equals("md") || suffix.equals("css"))
{
System.out.println(file.getAbsolutePath());
}
set.add(suffix);
});
System.out.println(names[i] + "done...");
}
System.out.println(set.toString());
// airflowdone...
//data-service-security-enhancedone...
//data-service-security-enhance-portaldone...
//eipdone...
//[, css, FDC, log, py, iml, js, conf, pid, eot, lst, sql, 20190723, java, ico, sh, xml, md, json,
// yml, jar, woff2, html, class, map, zip, jpg, types, original, svg, gitignore, ttf, png, war, sample,
// pack, woff, txt, 1, 2, meta, vm, name, cmd, idx, properties]
}
(结果)最终实现
@Test
public void t03() throws Exception
{
//[css, py, js, conf, sql,java, sh, xml, json, yml, html ,properties]
ArrayList<String> suffix_list = new ArrayList<>(Arrays.asList("css", "py", " js", "conf", " sql", "java", "sh", "xml", "json", "yml", "html", "properties"));
String mainPath = "D:/Everything/SVN/workspace/0303编码实现/trunk/Source/";
String[] names = {"airflow", "data-service-security-enhance", "data-service-security-enhance-portal", "eip"};
for (int i = 0; i < names.length; i++)
{
Collection<File> files = FileUtils.listFiles(new File(mainPath + names[i]),
EmptyFileFilter.NOT_EMPTY,
DirectoryFileFilter.INSTANCE);
files.forEach((file) ->
{
String suffix = FilenameUtils.getExtension(file.getName()).trim();
if (suffix_list.contains(suffix))
{
String name = file.getName();
try
{
String content = FileUtils.readFileToString(file, MyFileUtils.charset(file.getAbsolutePath()));
StringBuilder sb = new StringBuilder();
sb.append("# " + name + "\n");
sb.append("```" + suffix + "\n");
sb.append(content + "\n");
sb.append("```\n\n");
FileUtils.write(new File("D:/test/result.md"), sb.toString(), "utf8", true);
} catch (IOException e)
{
e.printStackTrace();
}
}
});
//System.out.println(names[i] + " done...");
}
}