统计

用户需求:英语的26 个字母的频率在一本小说中是如何分布的?某类型文章中常出现的单词是什么?某作家最常用的词汇是什么?《哈利波特》 中最常用的短语是什么,等等。

要求:输出单个文件中的前 N 个最常出现的英语单词,并将结果输入到文本文件中

package wordcont;


import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.TreeSet;
import wordcont.WordEntity;
 
public class WordCont {
 
    public void displayWordCount(String fileName){
        //字符统计
        try {
            BufferedReader reader = new BufferedReader(new FileReader(fileName));
            String line = null;
            TreeMap<String,Integer> tm = new TreeMap<String,Integer>();
            
            while((line=reader.readLine())!=null){
                line = line.toLowerCase();
                String str[] = line.split("\\s+");
                for(int i = 0; i<str.length; i++){
                    String word = str[i].trim();
                    if(tm.containsKey(word)){
                        tm.put(word, tm.get(word)+1);
                    }else{
                        tm.put(word, 1);
                    }
                }
            }
            //输出我们想要的字符串格式
            System.out.println("按字典序输出为:");
            Iterator iterator=tm.entrySet().iterator();
            while(iterator.hasNext())
            {
                System.out.println(iterator.next());
            }
            
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }catch (IOException e) {
            e.printStackTrace();
        }
    }
    
    public void displayFrequencyWord(String fileName){
        //显示输出
        try {
            BufferedReader br = new BufferedReader(new FileReader(fileName));
            String s;
            StringBuffer sb = new StringBuffer();
            while ((s = br.readLine()) != null) {
                sb.append(s);
            }
            
            Map<String,Integer> map = new HashMap<String, Integer>();
            StringTokenizer st = new StringTokenizer(sb.toString(),",.! \n");
            while (st.hasMoreTokens()) {
                String letter = st.nextToken().trim();
                int count;
                if (!map.containsKey(letter)) {
                    count = 1;
                } else {
                    count = map.get(letter).intValue() + 1;
                }
                map.put(letter,count);
            }
            
            Set<WordEntity> set = new TreeSet<WordEntity>();
            for (String key : map.keySet()) {
                set.add(new WordEntity(key,map.get(key)));
            }
 
            System.out.println("出现频率最高的单词:");
            Iterator<WordEntity> it1 = set.iterator();
            int count=it1.next().getCount();
            for (Iterator<WordEntity> it = set.iterator(); it.hasNext(); ) {
                WordEntity w = it.next();
                
                if (w.getCount()==count)// 当输出3个后跳出循环
                    //break;
                
                System.out.println(w.getKey() + " 出现的次数为: "+ w.getCount());
                
            }
        } catch (FileNotFoundException e) {
            System.out.println("文件未找到~!");
        } catch (IOException e) {
            System.out.println("文件读异常~!");
        }
 
    }
 
}

package wordcont;
 
import wordcont.WordEntity;
 
public class WordEntity implements Comparable<WordEntity>{
    @Override
    public int compareTo(WordEntity o) {
        int cmp = count.intValue() - o.count.intValue();
        return (cmp == 0 ? key.compareTo(o.key) : -cmp);
        //只需在这儿加一个负号就可以决定是升序还是降序排列  -cmp降序排列,cmp升序排列
        //因为TreeSet会调用WorkForMap的compareTo方法来决定自己的排序
    }
 
    private String key;
    private Integer count;
 
    public WordEntity ( String key,Integer count) {
        this.key = key;
        this.count = count;
    }
 
    public WordEntity(){
 
    }
 
    @Override
    public String toString() {
        return key + " 出现的次数为:" + count;
    }
 
    public String getKey() {
        return key;
    }
 
    public Integer getCount() {
        return count;
    }
}
package wordcont;
 
import java.util.Scanner;
 
import wordcont.WordCont;
 
public class Main {
 
    /**
     * @param args
     */
    public static void main(String[] args) {
        System.out.println("输入文件路径:\n");            
        Scanner in=new Scanner(System.in);
        String line=in.nextLine();
        String fileName= line.trim();
        WordCont wc = new WordCont();
        wc.displayWordCount(fileName);
        wc.displayFrequencyWord(fileName);
    }
 
}

 

posted @ 2018-10-14 20:57  ZZKZS  阅读(182)  评论(0编辑  收藏  举报
/*鼠标跟随效果*/