java_对7个文件中的单词进行词频统计所有单词进行词频统计，要求去除停用词、去除单词首尾的标点符号，并按词频大小按降序排列写到文件WordCount.txt中

版本1(探索版)

 package experiment6.exp4;
/*
对7个文件Lincoln, Abraham - The Writings of Abraham Lincoln Volume 1.txt     ~    Lincoln, Abraham - The Writings of Abraham Lincoln Volume 7.txt中的
所有单词进行词频统计，
要求去除停用词、
去除 单词首尾 的标点符号，
并按词频大小按 降序排列
写到文件WordCount.txt中。
可用HashMap实现单词词频记录。
写文件可用语句：
import java.io.PrintWriter;
PrintWriter pw = new PrintWriter("data/wordcount.txt");
pw.write();
 
*/
 
import experiment5.exp4.Tuple;
 
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
 
public class WordFrequencyStatistics {
    public static void main(String[] args) {
        List<String> list = new ArrayList<>();
        Map<String, Integer> map = new TreeMap<>();
        Set<String> setStopWords = new HashSet<>();/*Set里的对象是String,不需要重写equals和hashCode方法.*/
        /*读入多个文件的数据到list中去.*/
        /*分析文件名结构,以便利用循环读入数据.*/
        String filesDirectory = "D:\\ecloud\\textbooks\\java\\experiment_doc\\dataExperiment6";
        String filenamesPre = filesDirectory + "\\Lincoln, Abraham - The Writings of Abraham Lincoln Volume ";
        int no = 1;
        String filenamesPos = ".txt";
        Scanner scanner = null;
        /*读取停用词:*/
        File fileStopWord = new File(filesDirectory + "\\stopwords" + filenamesPos);
        try {
            scanner = new Scanner(fileStopWord);
            String stringStopWord = scanner.nextLine();
            for (; scanner.hasNextLine(); stringStopWord = scanner.nextLine())
                setStopWords.add(stringStopWord);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        /*开始读入各个文件,并处理(这里读入7个文件)*/
        for (; no < 8; no++) {
 
            File file = new File(filenamesPre + no + filenamesPos);
            // File file2 = new File("Lincoln, Abraham - The Writings of Abraham Lincoln Volume 2.txt");
            try {
                scanner = new Scanner(file);
                String s;
                for (s = scanner.next(); scanner.hasNext(); s = scanner.next()) {
                    char tmpCharPos = (s.charAt(s.length() - 1));
                    char tmpCharPre = (s.charAt(0));
 
                    /*由于待分析文本成分比较复杂,如果不利用正则表达式,会显得力不从心,某些特殊情况无法良好解析导致有偏差
                     * 也就是说,主要是正确的解析单词是修正本程序的关键(待优化....).*/
                    /*将不是字母同时也不是数字的边缘字符丢掉(注意isDigital会将'.'作为数字的一部分*/
                    /* */
                    if (!Character.isAlphabetic(tmpCharPos) && (!(Character.isDigit(tmpCharPos) && tmpCharPos != '.')) && s.length() > 1) {
                        s = s.substring(0, s.length() - 1);
                    }//endIf1
                    if (!Character.isAlphabetic(tmpCharPre) && (!(Character.isDigit(tmpCharPre) && tmpCharPre != '.')) && s.length() > 1) {
                        s = s.substring(1);
                    }//endIf2
                    if (!setStopWords.contains(s.toLowerCase()))//忽视大小写的区别(这里提供的StopWords中的单词都是小写的,因而只需要当方面的将被比较字符转为纯小写即可达到效果(如有必要,可以将listStopWord中的单词也都转为小写,可以达到忽略大小写的效果.
                    {
                        list.add(s);
                    }//endIf3
                }/*endfor至此,成功读入数据到list中*/
 
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }//endCatch
        }//endFor
 
        /*开始利用map统计词频:*/
        for (String x : list) {
            if (!map.containsKey(x)) {
                map.put(x, 1);
            } else
                map.put(x, map.get(x) + 1);
        }
        /*监视下结果:*/
        System.out.println("observation");
        //Collections.sort(list,new Comparator<Integer>());
        /*使用遍历map的套路(两种之一)*/
/*        for (Map.Entry<String, Integer> x : map.entrySet()) {
            System.out.println(x);
        }*/
        List<Tuple> listTuples = new ArrayList<>();
        for (String x : map.keySet()) {
            listTuples.add(new Tuple(x, map.get(x)));
        }
        Collections.sort(listTuples, new Comparator<Tuple>() {
            @Override
            public int compare(Tuple o1, Tuple o2) {
                return o2.getValue() - o1.getValue();
            }
        });
        //System.out.println(listTuples);
        for (Tuple x : listTuples) {
            System.out.println(x);
        }
 
    }//endMain
}

 package experiment6.exp4;
 
public class Tuple {
    String string;
    int num;
 
    public Tuple(String string, int num) {
        this.string = string;
        this.num = num;
    }
 
    public String getKey() {
        return string;
    }
 
    public int getValue() {
        return num;
    }
 
    @Override
    public String toString() {
        return getKey()+"\t"+getValue();
    }
}

版本2

 package experiment6.exp4;
/*
对7个文件Lincoln, Abraham - The Writings of Abraham Lincoln Volume 1.txt     ~    Lincoln, Abraham - The Writings of Abraham Lincoln Volume 7.txt中的
所有单词进行词频统计，
要求去除停用词、
去除 单词首尾 的标点符号，
并按词频大小按 降序排列
写到文件WordCount.txt中。
可用HashMap实现单词词频记录。
写文件可用语句：
import java.io.PrintWriter;
PrintWriter pw = new PrintWriter("data/wordcount.txt");
pw.write();
 
*/
 
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.*;
 
public class WordFrequencyStatistics {
    public static void main(String[] args) {
        List<String> list = new ArrayList<>();
        Map<String, Integer> map = new TreeMap<>();
        Set<String> setStopWords = new HashSet<>();/*Set里的对象是String,不需要重写equals和hashCode方法.*/
        /*读入多个文件的数据到list中去.*/
        /*分析文件名结构,以便利用循环读入数据.*/
        String filesDirectory = "D:\\ecloud\\textbooks\\java\\experiment_doc\\dataExperiment6";
        String filenamesPre = filesDirectory + "\\Lincoln, Abraham - The Writings of Abraham Lincoln Volume ";
        int no = 1;
        String filenamesPos = ".txt";
        Scanner scanner = null;
        /*读取停用词:*/
        File fileStopWord = new File(filesDirectory + "\\stopwords" + filenamesPos);
        try {
            scanner = new Scanner(fileStopWord);
            String stringStopWord = scanner.nextLine();
            for (; scanner.hasNextLine(); stringStopWord = scanner.nextLine())
                setStopWords.add(stringStopWord);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        /*开始读入各个文件,并处理(这里读入7个文件)*/
        for (; no < 8; no++) {
 
            File file = new File(filenamesPre + no + filenamesPos);
            // File file2 = new File("Lincoln, Abraham - The Writings of Abraham Lincoln Volume 2.txt");
            try {
                scanner = new Scanner(file);
                String s;
 
                for (s = scanner.nextLine(); scanner.hasNextLine(); s = scanner.nextLine()) {
                    StringTokenizer tokenizer = new StringTokenizer(s, "  #*-,.!:;\"$()[]\\&?");//delimiter界定符
                   String str;
                    while(tokenizer.hasMoreElements()){
                        str=(String) tokenizer.nextElement();
                        if (!setStopWords.contains(str.toLowerCase()))/*//忽视大小写的区别(这里提供的StopWords中的单词都是小写的,
                    因而只需要当方面的将被比较字符转为纯小写即可达到效果(如有必要,可以将listStopWord中的单词也都转为小写,可以达到忽略大小写的效果.*/
                        {
                            list.add(str);
                        }//endIf
                    }
 
                }/*endfor至此,成功读入数据到list中*/
 
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }//endCatch
        }//endFor
 
        /*开始利用map统计词频:*/
        for (String x : list) {
            if (!map.containsKey(x)) {
                map.put(x, 1);
            } else
                map.put(x, map.get(x) + 1);
        }
        /*监视下结果:*/
        System.out.println("observation");
        //Collections.sort(list,new Comparator<Integer>());
        /*使用遍历map的套路(两种之一)*/
/*        for (Map.Entry<String, Integer> x : map.entrySet()) {
            System.out.println(x);
        }*/
        List<Tuple> listTuples = new ArrayList<>();
        for (String x : map.keySet()) {
            listTuples.add(new Tuple(x, map.get(x)));
        }
        Collections.sort(listTuples, new Comparator<Tuple>() {
            @Override
            public int compare(Tuple o1, Tuple o2) {
                return o2.getValue() - o1.getValue();
            }
        });
        //System.out.println(listTuples);
  /*      for (Tuple x : listTuples) {
            System.out.println(x);
        }*/
        PrintWriter pw = null;
        try {
            pw = new PrintWriter(filesDirectory+"/data/wordcount.txt");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        for (Tuple x : listTuples) {
            pw.write(x.toString());
        }
    }//endMain
}

 package experiment6.exp4;
 
public class Tuple {
    String string;
    int num;
 
    public Tuple(String string, int num) {
        this.string = string;
        this.num = num;
    }
 
    public String getKey() {
        return string;
    }
 
    public int getValue() {
        return num;
    }
 
    @Override
    public String toString() {
        return getKey()+"\t"+getValue()+"\n";
    }
}

版本三(主要是排除停用词的另一种过滤方式)

 package experiment6.exp4;
 
 
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.*;
 
public class WordFrequencyStatistics {
    public static void main(String[] args) {
        List<String> list = new ArrayList<>();
        Map<String, Integer> map = new TreeMap<>();
        Set<String> stopWordsSet = new HashSet<>();/*Set里的对象是String,不需要重写equals和hashCode方法.*/
        /*读入多个文件的数据到list中去.*/
        /*分析文件名结构,以便利用循环读入数据.*/
        String filesDirectory = "D:\\ecloud\\textbooks\\java\\experiment_doc\\dataExperiment6";
        String filenamesPre = filesDirectory + "\\Lincoln, Abraham - The Writings of Abraham Lincoln Volume ";
        int no = 1;
        String filenamesPos = ".txt";
        Scanner scanner;
        /*读取停用词:*/
        File fileStopWord = new File(filesDirectory + "\\stopwords" + filenamesPos);
        try {
            scanner = new Scanner(fileStopWord);
            String stringStopWord = scanner.nextLine();
            for (; scanner.hasNextLine(); stringStopWord = scanner.nextLine())
                stopWordsSet.add(stringStopWord);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        /*开始读入各个文件,并处理(这里读入7个文件)*/
        for (; no < 8; no++) {
 
            File file = new File(filenamesPre + no + filenamesPos);
            // File file2 = new File("Lincoln, Abraham - The Writings of Abraham Lincoln Volume 2.txt");
            try {
                scanner = new Scanner(file);
                String s;
 
                for (s = scanner.nextLine(); scanner.hasNextLine(); s = scanner.nextLine()) {
                    StringTokenizer tokenizer = new StringTokenizer(s, "  #*-,.!:;\"$()[]\\&?");//delimiter界定符
                    String str;
                    /*分析(剔除停用词)并将满足条件的单词添加到list中*/
                    while (tokenizer.hasMoreElements()) {//这一行中的所有单词(保存在tokenizer中)
                        str = (String) tokenizer.nextElement();//挨个地获取单词
                        /*排除停用词(方法1)*/
                        if (!stopWordsSet.contains(str.toLowerCase()))/*//忽视大小写的区别(这里提供的StopWords中的单词都是小写的,
                    因而只需要当方面的将被比较字符转为纯小写即可达到效果(如有必要,可以将listStopWord中的单词也都转为小写,可以达到忽略大小写的效果.*/
                        {
                            list.add(str);
                        }//endIf
                        /*方法2:(这个就不推荐,时间开销比方法一大的多)*/
//                        boolean isEliminate = false;
//                        for (String x : stopWordsSet) {
//                            if (str.compareToIgnoreCase(x) == 0)/*public int compareToIgnoreCase(@NotNull String str)*/ {
//                                isEliminate = true;
//                                break;
//                            }//endIf
//                        }//endFor
//                        if (isEliminate == false)
//                            list.add(str);
                        
                    }//endWhile
 
                }/*endfor至此,成功读入数据到list中*/
 
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }//endCatch
        }//endFor
 
        /*开始利用map统计词频:*/
        for (String x : list) {
            if (!map.containsKey(x)) {
                map.put(x, 1);
            } else
                map.put(x, map.get(x) + 1);
        }
        /*监视下结果:*/
        System.out.println("observation");
        //Collections.sort(list,new Comparator<Integer>());
        /*使用遍历map的套路(两种之一)*/
/*        for (Map.Entry<String, Integer> x : map.entrySet()) {
            System.out.println(x);
        }*/
        List<Tuple> listTuples = new ArrayList<>();
        for (String x : map.keySet()) {
            listTuples.add(new Tuple(x, map.get(x)));
        }
        Collections.sort(listTuples, new Comparator<Tuple>() {
            @Override
            public int compare(Tuple o1, Tuple o2) {
                return o2.getValue() - o1.getValue();
            }
        });
        /*打印结果*/
        //System.out.println(listTuples);
        for (Tuple x : listTuples) {
            System.out.println(x);
        }
        /*将结果输出到文件中*/
//        PrintWriter pw = null;
//        try {
//            pw = new PrintWriter(filesDirectory+"/data/wordcount.txt");
//        } catch (FileNotFoundException e) {
//            e.printStackTrace();
//        }
//        for (Tuple x : listTuples) {
//            pw.write(x.toString());
//        }//endFor
    }//endMain
}

版本4(利用StringBuffer来处理待写入字符串内容)

 package experiment6.exp4;
/*
对7个文件Lincoln, Abraham - The Writings of Abraham Lincoln Volume 1.txt     ~    Lincoln, Abraham - The Writings of Abraham Lincoln Volume 7.txt中的
所有单词进行词频统计，
要求去除停用词、
去除 单词首尾 的标点符号，
并按词频大小按 降序排列
写到文件WordCount.txt中。
可用HashMap实现单词词频记录。
写文件可用语句：
import java.io.PrintWriter;
PrintWriter pw = new PrintWriter("data/wordcount.txt");
pw.write();
 
*/
 
import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.*;
 
public class WordFrequencyStatistics {
    public static void main(String[] args) {
        List<String> list = new ArrayList<>();
        Map<String, Integer> map = new TreeMap<>();
        Set<String> stopWordsSet = new HashSet<>();/*Set里的对象是String,不需要重写equals和hashCode方法.*/
        /*读入多个文件的数据到list中去.*/
        /*分析文件名结构,以便利用循环读入数据.*/
        String filesDirectory = "D:\\ecloud\\textbooks\\java\\experiment_doc\\dataExperiment6";
        String filenamesPre = filesDirectory + "\\Lincoln, Abraham - The Writings of Abraham Lincoln Volume ";
        int no = 1;
        String filenamesPos = ".txt";
        Scanner scanner;
        /*读取停用词:*/
        File fileStopWord = new File(filesDirectory + "\\stopwords" + filenamesPos);
        try {
            scanner = new Scanner(fileStopWord);
            String stringStopWord = scanner.nextLine();
            for (; scanner.hasNextLine(); stringStopWord = scanner.nextLine())
                stopWordsSet.add(stringStopWord);
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        /*开始读入各个文件,并处理(这里读入7个文件)*/
        for (; no < 8; no++) {
//File 对象是定义在for中的,即,尽管每次进入for时,File类的实例名都叫file,但由于都是通过new来实例化的,所以这些同名File对象是不同的.
            File file = new File(filenamesPre + no + filenamesPos);
            // File file2 = new File("Lincoln, Abraham - The Writings of Abraham Lincoln Volume 2.txt");
            try {
                scanner = new Scanner(file);
                String s;
 
                for (s = scanner.nextLine(); scanner.hasNextLine(); s = scanner.nextLine()) {
                    StringTokenizer tokenizer = new StringTokenizer(s, "  #*-,.!:;\"$()[]\\&?");//delimiter界定符
                    String str;
                    /*分析(剔除停用词)并将满足条件的单词添加到list中*/
                    while (tokenizer.hasMoreElements()) {//这一行中的所有单词(保存在tokenizer中)
                        str = (String) tokenizer.nextElement();//挨个地获取单词
                        /*排除停用词(方法1)*/
                        if (!stopWordsSet.contains(str.toLowerCase()))/*//忽视大小写的区别(这里提供的StopWords中的单词都是小写的,
                    因而只需要当方面的将被比较字符转为纯小写即可达到效果(如有必要,可以将listStopWord中的单词也都转为小写,可以达到忽略大小写的效果.*/
                        {
                            list.add(str);
                        }//endIf
                        /*方法2:(这个就不推荐,时间开销比方法一大的多)*/
//                        boolean isEliminate = false;
//                        for (String x : stopWordsSet) {
//                            if (str.compareToIgnoreCase(x) == 0)/*public int compareToIgnoreCase(@NotNull String str)*/ {
//                                isEliminate = true;
//                                break;
//                            }//endIf
//                        }//endFor
//                        if (isEliminate == false)
//                            list.add(str);
 
                    }//endWhile
 
                }/*endfor至此,成功读入数据到list中*/
 
            } catch (FileNotFoundException e) {
                e.printStackTrace();
            }//endCatch
        }//endFor
 
        /*开始利用map统计词频:*/
        for (String x : list) {
            if (!map.containsKey(x)) {
                map.put(x, 1);
            } else
                map.put(x, map.get(x) + 1);
        }
 
        List<Tuple> listTuples = new ArrayList<>();
        for (String x : map.keySet()) {
            listTuples.add(new Tuple(x, map.get(x)));
        }
        Collections.sort(listTuples, new Comparator<Tuple>() {
            @Override
            public int compare(Tuple o1, Tuple o2) {
                return o2.getValue() - o1.getValue();
            }
        });
        /*打印结果*/
//        //System.out.println(listTuples);
//        for (Tuple x : listTuples) {
//            System.out.println(x);
//        }
        /*将结果输出到文件中*/
        /*创建文件*/
        PrintWriter pw = null;
        try {
            //pw = new PrintWriter(filesDirectory+"/data/wordcount.txt");
            pw = new PrintWriter("C://users//xuchaoxin//desktop//wordcount.txt");
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        /*写入方式1:*/
//        for (Tuple x : listTuples) {
//            pw.write(x.toString());
//        }//endFor
        /*写入方式2:*/
        StringBuffer sb=new StringBuffer();
        for(Tuple x:listTuples){
            sb.append(x.toString());
        }
        //System.out.println(sb);//其实是打印了sb.toString();
        pw.write(sb.toString());
    }//endMain
}

posted @ 2023-12-05 18:30 xuchaoxin1375 阅读(7) 评论(0) 编辑收藏举报来源

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

· java_倒排索引(单线程版+多线程(初)版)

· java统计一个文本文件英文单词

· java读取文件并统计出现前N个单词

· JAVA重要知识点

阅读排行：
· 分享4款.NET开源、免费、实用的商城系统
· 全程不用写代码，我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了，比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· 白话解读 Dapr 1.15：你的「微服务管家」又秀新绝活了

历史上的今天：
2022-12-05 CN_MAC介质访问控制子层@CSMA协议
2021-12-05 linux_mysql运行状态检查/端口检查/本地主机连接远程服务器宝塔面板中的mysql_(本地mysql软件连接到远程mysql(登陆任意用户/root用户)

公告

昵称： xuchaoxin1375
园龄： 4年10个月
粉丝： 1
关注： 0

+加关注

2025年3月

日

一

二

三

四

五

六

xuchaoxin1375

java_对7个文件中的单词进行词频统计所有单词进行词频统计，要求去除停用词、去除单词首尾的标点符号，并按词频大小按降序排列写到文件WordCount.txt中

公告

搜索

常用链接

随笔档案

阅读排行榜

推荐排行榜

	package experiment6.exp4;
	/*
	对7个文件Lincoln, Abraham - The Writings of Abraham Lincoln Volume 1.txt ~ Lincoln, Abraham - The Writings of Abraham Lincoln Volume 7.txt中的
	所有单词进行词频统计，
	要求去除停用词、
	去除单词首尾的标点符号，
	并按词频大小按降序排列
	写到文件WordCount.txt中。
	可用HashMap实现单词词频记录。
	写文件可用语句：
	import java.io.PrintWriter;
	PrintWriter pw = new PrintWriter("data/wordcount.txt");
	pw.write();

	*/

	import experiment5.exp4.Tuple;

	import java.io.File;
	import java.io.FileNotFoundException;
	import java.util.*;

	public class WordFrequencyStatistics {
	public static void main(String[] args) {
	List<String> list = new ArrayList<>();
	Map<String, Integer> map = new TreeMap<>();
	Set<String> setStopWords = new HashSet<>();/Set里的对象是String,不需要重写equals和hashCode方法./
	/读入多个文件的数据到list中去./
	/分析文件名结构,以便利用循环读入数据./
	String filesDirectory = "D:\\ecloud\\textbooks\\java\\experiment_doc\\dataExperiment6";
	String filenamesPre = filesDirectory + "\\Lincoln, Abraham - The Writings of Abraham Lincoln Volume ";
	int no = 1;
	String filenamesPos = ".txt";
	Scanner scanner = null;
	/读取停用词:/
	File fileStopWord = new File(filesDirectory + "\\stopwords" + filenamesPos);
	try {
	scanner = new Scanner(fileStopWord);
	String stringStopWord = scanner.nextLine();
	for (; scanner.hasNextLine(); stringStopWord = scanner.nextLine())
	setStopWords.add(stringStopWord);
	} catch (FileNotFoundException e) {
	e.printStackTrace();
	}
	/开始读入各个文件,并处理(这里读入7个文件)/
	for (; no < 8; no++) {

	File file = new File(filenamesPre + no + filenamesPos);
	// File file2 = new File("Lincoln, Abraham - The Writings of Abraham Lincoln Volume 2.txt");
	try {
	scanner = new Scanner(file);
	String s;
	for (s = scanner.next(); scanner.hasNext(); s = scanner.next()) {
	char tmpCharPos = (s.charAt(s.length() - 1));
	char tmpCharPre = (s.charAt(0));

	/*由于待分析文本成分比较复杂,如果不利用正则表达式,会显得力不从心,某些特殊情况无法良好解析导致有偏差
	* 也就是说,主要是正确的解析单词是修正本程序的关键(待优化....).*/
	/将不是字母同时也不是数字的边缘字符丢掉(注意isDigital会将'.'作为数字的一部分/
	/* */
	if (!Character.isAlphabetic(tmpCharPos) && (!(Character.isDigit(tmpCharPos) && tmpCharPos != '.')) && s.length() > 1) {
	s = s.substring(0, s.length() - 1);
	}//endIf1
	if (!Character.isAlphabetic(tmpCharPre) && (!(Character.isDigit(tmpCharPre) && tmpCharPre != '.')) && s.length() > 1) {
	s = s.substring(1);
	}//endIf2
	if (!setStopWords.contains(s.toLowerCase()))//忽视大小写的区别(这里提供的StopWords中的单词都是小写的,因而只需要当方面的将被比较字符转为纯小写即可达到效果(如有必要,可以将listStopWord中的单词也都转为小写,可以达到忽略大小写的效果.
	{
	list.add(s);
	}//endIf3
	}/endfor至此,成功读入数据到list中/

	} catch (FileNotFoundException e) {
	e.printStackTrace();
	}//endCatch
	}//endFor

	/开始利用map统计词频:/
	for (String x : list) {
	if (!map.containsKey(x)) {
	map.put(x, 1);
	} else
	map.put(x, map.get(x) + 1);
	}
	/监视下结果:/
	System.out.println("observation");
	//Collections.sort(list,new Comparator<Integer>());
	/使用遍历map的套路(两种之一)/
	/* for (Map.Entry<String, Integer> x : map.entrySet()) {
	System.out.println(x);
	}*/
	List<Tuple> listTuples = new ArrayList<>();
	for (String x : map.keySet()) {
	listTuples.add(new Tuple(x, map.get(x)));
	}
	Collections.sort(listTuples, new Comparator<Tuple>() {
	@Override
	public int compare(Tuple o1, Tuple o2) {
	return o2.getValue() - o1.getValue();
	}
	});
	//System.out.println(listTuples);
	for (Tuple x : listTuples) {
	System.out.println(x);
	}

	}//endMain
	}

	package experiment6.exp4;

	public class Tuple {
	String string;
	int num;

	public Tuple(String string, int num) {
	this.string = string;
	this.num = num;
	}

	public String getKey() {
	return string;
	}

	public int getValue() {
	return num;
	}

	@Override
	public String toString() {
	return getKey()+"\t"+getValue();
	}
	}

xuchaoxin1375

java_对7个文件中的单词进行词频统计所有单词进行词频统计， 要求去除停用词、 去除 单词首尾 的标点符号， 并按词频大小按 降序排列 写到文件WordCount.txt中

公告

搜索

常用链接

随笔档案

阅读排行榜

推荐排行榜

java_对7个文件中的单词进行词频统计所有单词进行词频统计，要求去除停用词、去除单词首尾的标点符号，并按词频大小按降序排列写到文件WordCount.txt中