把文件夹的文件根据后缀名过滤,并以Markdown格式汇总到一个文件

导入依赖

    <dependencies>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.12</version>
        </dependency>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <version>2.5</version>
        </dependency>
    </dependencies>

获取字符集工具类

package www.taopanfeng.top.utils;

import java.io.BufferedInputStream;
import java.io.FileInputStream;

/**
 * @author TaoPanfeng
 * @version 1.0
 * @description
 * @date 2020-03-13 11:52
 */
public class MyFileUtils
{
    /**
     * @description 根据文件获取字符集【例如UTF8 GBK】
     * @param 文件字符串
     * @author TaoPanfeng
     * @date 2020-03-13 11:52
     */

    public static String charset(String path)
    {
        String charset = "GBK";
        byte[] first3Bytes = new byte[3];
        try
        {
            boolean checked = false;
            BufferedInputStream bis = new BufferedInputStream(new FileInputStream(path));
            bis.mark(0); // 读者注: bis.mark(0);修改为 bis.mark(100);我用过这段代码,需要修改上面标出的地方。
            // Wagsn注:不过暂时使用正常,遂不改之
            int read = bis.read(first3Bytes, 0, 3);
            if (read == -1)
            {
                bis.close();
                return charset; // 文件编码为 ANSI
            } else if (first3Bytes[0] == (byte) 0xFF && first3Bytes[1] == (byte) 0xFE)
            {
                charset = "UTF-16LE"; // 文件编码为 Unicode
                checked = true;
            } else if (first3Bytes[0] == (byte) 0xFE && first3Bytes[1] == (byte) 0xFF)
            {
                charset = "UTF-16BE"; // 文件编码为 Unicode big endian
                checked = true;
            } else if (first3Bytes[0] == (byte) 0xEF && first3Bytes[1] == (byte) 0xBB
                    && first3Bytes[2] == (byte) 0xBF)
            {
                charset = "UTF-8"; // 文件编码为 UTF-8
                checked = true;
            }
            bis.reset();
            if (!checked)
            {
                while ((read = bis.read()) != -1)
                {
                    if (read >= 0xF0)
                        break;
                    if (0x80 <= read && read <= 0xBF) // 单独出现BF以下的,也算是GBK
                        break;
                    if (0xC0 <= read && read <= 0xDF)
                    {
                        read = bis.read();
                        if (0x80 <= read && read <= 0xBF) // 双字节 (0xC0 - 0xDF)
                            // (0x80 - 0xBF),也可能在GB编码内
                            continue;
                        else
                            break;
                    } else if (0xE0 <= read && read <= 0xEF)
                    { // 也有可能出错,但是几率较小
                        read = bis.read();
                        if (0x80 <= read && read <= 0xBF)
                        {
                            read = bis.read();
                            if (0x80 <= read && read <= 0xBF)
                            {
                                charset = "UTF-8";
                                break;
                            } else
                                break;
                        } else
                            break;
                    }
                }
            }
            bis.close();
        } catch (Exception e)
        {
            e.printStackTrace();
        }
        //System.out.println("--文件-> [" + path + "] 采用的字符集为: [" + charset + "]");
        return charset;
    }
}

给类中新建测试方法

@Test
public void t00() throws Exception
{
    for (int i = 1; i < 50; i++)
    {
        String number = i < 10 ? "0" + i : "" + i;

        System.out.println(" @Test public void t" + number + "()throws Exception{}");
    }
}

(过程1)测试输出到文件

    @Test
    public void t01() throws Exception
    {
        Collection<File> files = FileUtils.listFiles(new File("D:/test/"),
                EmptyFileFilter.NOT_EMPTY,
                DirectoryFileFilter.INSTANCE);

        files.forEach((file) ->
        {
            String name = file.getName();
            try
            {

                String content = FileUtils.readFileToString(file, MyFileUtils.charset(file.getAbsolutePath()));
                StringBuilder sb = new StringBuilder();
                sb.append("# " + name + "\n");
                sb.append("```\n");
                sb.append(content + "\n");
                sb.append("```\n\n");
                FileUtils.write(new File("D:/test/result.md"), sb.toString(), "utf8", true);
            } catch (IOException e)
            {
                e.printStackTrace();
            }

        });
    }

(过程2)打印后缀名

    @Test
    public void t02() throws Exception
    {
        String mainPath = "D:/Everything/SVN/workspace/0303编码实现/trunk/Source/";
        String[] names = {"airflow", "data-service-security-enhance", "data-service-security-enhance-portal", "eip"};
        HashSet<String> set = new HashSet<>();
        for (int i = 0; i < names.length; i++)
        {
            Collection<File> files = FileUtils.listFiles(new File(mainPath + names[i]),
                    EmptyFileFilter.NOT_EMPTY,
                    DirectoryFileFilter.INSTANCE);

            files.forEach((file) ->
            {
                String suffix = FilenameUtils.getExtension(file.getName());
                if (suffix.equals("md") || suffix.equals("css"))
                {
                    System.out.println(file.getAbsolutePath());
                }
                set.add(suffix);

            });
            System.out.println(names[i] + "done...");
        }

        System.out.println(set.toString());

        // airflowdone...
        //data-service-security-enhancedone...
        //data-service-security-enhance-portaldone...
        //eipdone...
        //[, css, FDC, log, py, iml, js, conf, pid, eot, lst, sql, 20190723, java, ico, sh, xml, md, json,
        // yml, jar, woff2, html, class, map, zip, jpg, types, original, svg, gitignore, ttf, png, war, sample,
        // pack, woff, txt, 1, 2, meta, vm, name, cmd, idx, properties]
    }

(结果)最终实现

    @Test
    public void t03() throws Exception
    {
        //[css, py, js, conf, sql,java, sh, xml, json, yml, html ,properties]
        ArrayList<String> suffix_list = new ArrayList<>(Arrays.asList("css", "py", " js", "conf", " sql", "java", "sh", "xml", "json", "yml", "html", "properties"));

        String mainPath = "D:/Everything/SVN/workspace/0303编码实现/trunk/Source/";
        String[] names = {"airflow", "data-service-security-enhance", "data-service-security-enhance-portal", "eip"};
        for (int i = 0; i < names.length; i++)
        {
            Collection<File> files = FileUtils.listFiles(new File(mainPath + names[i]),
                    EmptyFileFilter.NOT_EMPTY,
                    DirectoryFileFilter.INSTANCE);

            files.forEach((file) ->
            {
                String suffix = FilenameUtils.getExtension(file.getName()).trim();
                if (suffix_list.contains(suffix))
                {
                    String name = file.getName();
                    try
                    {
                        String content = FileUtils.readFileToString(file, MyFileUtils.charset(file.getAbsolutePath()));
                        StringBuilder sb = new StringBuilder();
                        sb.append("# " + name + "\n");
                        sb.append("```" + suffix + "\n");
                        sb.append(content + "\n");
                        sb.append("```\n\n");
                        FileUtils.write(new File("D:/test/result.md"), sb.toString(), "utf8", true);
                    } catch (IOException e)
                    {
                        e.printStackTrace();
                    }
                }
            });
            //System.out.println(names[i] + " done...");
        }
    }
posted on 2020-03-13 13:20  陶攀峰  阅读(194)  评论(0编辑  收藏  举报

顶部 底部