解析一个文件夹所有文件的中文，并输出到某一文本文档中

public class DoGetChinese{ 
public static void main(String[] args) {
            String src = "D:/ab.txt";
            String res = "D:\\static";
            File srcFile = new File(src);
            File resFile = new File(res);
            System.out.println(dowork(srcFile,resFile));
        }

        public static boolean dowork(File srcFile,File resFile){
            if(resFile.isDirectory()){
                File[] files = scanFile(resFile);
                for(File f:files){
                    dowork(srcFile,f);
                }
            }else{
                System.out.println("开始解析 "+resFile.getName()+"文件");
                analysefile(srcFile,resFile);
            }

            return true;
        }

        public static File[] scanFile(File file){
            return file.listFiles();
        }

        public static void analysefile(File srcfile,File resFile){
            try(
　　　　　　　　　　//这里建立流输入流时注意设置源文件的编码格式，默认为utf-8
　　　　　　　　　　FileReader fr = new FileReader(resFile);
                BufferedReader br = new BufferedReader(fr);
                FileWriter fw = new FileWriter(srcfile,true);
                PrintWriter pw = new PrintWriter(fw);  ){
                List<String> words;
                while (true) {
                    // 一次读一行
                    String line = br.readLine();
                    if (null == line)
                        break;
                    words = getChinese(line);
                    for(String s:words){
                        pw.println(s);
                    }
                }

            }catch (Exception e){
                e.printStackTrace();
            }
        }


        /*1、至少匹配一个汉字的写法。
        2、这两个unicode值正好是Unicode表中的汉字的头和尾。
        3、"[]"代表里边的值出现一个就可以，后边的“+”代表至少出现1次，合起来即至少匹配一个汉字。
        */
        public static List<String> getChinese(String paramValue) {
            String regex = "([\u4e00-\u9fa5]+)";
            String str = "";
            Matcher matcher = Pattern.compile(regex).matcher(paramValue);
            List<String> result = new ArrayList<>();
            while (matcher.find()) {
                result.add(matcher.group(0));
            }
            return result;
        }
}

posted @ 2017-09-22 15:39 空空的小冷阅读(302) 评论(0) 编辑收藏举报

刷新页面返回顶部

空空的小冷

解析一个文件夹所有文件的中文，并输出到某一文本文档中

公告