单词统计
1统计字母出现的概率(不分大小写)
import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.text.DecimalFormat; import java.text.NumberFormat; public class a { private static NumberFormat nf = new DecimalFormat("0.00"); public static void main(String[] args) throws IOException { BufferedReader br = new BufferedReader(new FileReader("f:/piao.txt")); StringBuilder sb = new StringBuilder(); while (true) { String line = br.readLine(); if (line == null) break; sb.append(line); } br.close(); int[] characters = new int[128]; for (byte bt : sb.toString().getBytes()) if (bt > 0 && bt < 128) characters[bt]++; int totalCount = 0; for (int i = 'a'; i <= 'z'; i++) { totalCount += (characters[i] + characters[i - 32]); } double c; for (int i = 'a'; i <= 'z'; i++) { c=(double)(Math.round((characters[i] + characters[i - 32]) * 100 / totalCount)/100.0); System.out.print((char) i + "=" + (characters[i] + characters[i - 32]) + "(" +c + "),"); } } }
截图:
2统计单词出现的次数
import java.io.BufferedReader; import java.io.FileReader; import java.util.*; public class b { public static void main(String [] args) throws Exception { BufferedReader br = new BufferedReader(new FileReader("f:/飘c1.txt")); StringBuffer sb = new StringBuffer(); String text =null; while ((text=br.readLine())!= null){ sb.append(text);// 将读取出的字符追加到stringbuffer中 } br.close(); // 关闭读入流 String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写 String[] words = str.split("[^(a-zA-Z)]+"); // 非单词的字符来分割,得到所有单词 Map<String ,Integer> map = new HashMap<String, Integer>() ; for(String word :words){ if(map.get(word)==null ){ // 若不存在说明是第一次,则加入到map,出现次数为1 map.put(word,1); }else { map.put(word,map.get(word)+1); // 若存在,次数累加1 } } // 排序 List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet()); Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() { public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) { return (left.getValue().compareTo(right.getValue())); } }; // 集合默认升序升序 Collections.sort(list,comparator); for(int i=0;i<list.size();i++){// 由高到低输出 System.out.println(list.get(list.size()-i-1).getKey() +":"+list.get(list.size()-i-1).getValue()); } } }
截图
3去除无用词
public class English_word { public static void main(String[] args) throws FileNotFoundException { File file = new File("D:\\Englis_letter.txt");// 读取文件 String words[] = new String [100000]; int out_words[] = new int [100000]; if (!file.exists()) {// 如果文件打不开或不存在则提示错误 System.out.println("文件不存在"); return; } Scanner x = new Scanner(file); HashMap<String, Integer> hashMap = new HashMap<String, Integer>(); while (x.hasNextLine()) { String line = x.nextLine(); String[] lineWords = line.split("[\\s+\t”“();,.?!\n]"); Set<String> wordSet = hashMap.keySet(); for (int i = 0; i < lineWords.length; i++) { if (wordSet.contains(lineWords[i])) { Integer number = hashMap.get(lineWords[i]); number++; hashMap.put(lineWords[i], number); } else { hashMap.put(lineWords[i], 1); } } } Iterator<String> iterator = hashMap.keySet().iterator(); int max = 0,i=0; while (iterator.hasNext()) { String word = iterator.next(); if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!" ".equals(word)) { System.out.println(word); words[i]=word; out_words[i]=hashMap.get(word); i++; } } int change=0; String change_word=null; for(int j=0;j<=i;j++) { for(int k=j;k<=i;k++) { if(out_words[k]>out_words[j]) { change=out_words[j]; change_word=words[j]; out_words[j]=out_words[k]; words[j]=words[k]; out_words[k]=change; words[k]=change_word; } } } Scanner scan = new Scanner(System.in); int ms = scan.nextInt(); for(int j=0;j<ms;j++) { System.out.println(words[j]+" 出现次数:"+out_words[j]); } }
4遍历文件统计
public class test { static String words[] = new String [100000]; static int out_words[] = new int [100000]; static int i=0; static HashMap<String, Integer> hashMap = new HashMap<String, Integer>(); public static void English_words(File ms) throws FileNotFoundException { File file = new File(ms.toString());// 读取文件 if (!file.exists()) {// 如果文件打不开或不存在则提示错误 System.out.println("文件不存在"); return; } Scanner x = new Scanner(file); while (x.hasNextLine()) { String line = x.nextLine(); String[] lineWords = line.split("[\\s+\t”“();,.?!\n]"); Set<String> wordSet = hashMap.keySet(); for (int i = 0; i < lineWords.length; i++) { if (wordSet.contains(lineWords[i])) { Integer number = hashMap.get(lineWords[i]); number++; hashMap.put(lineWords[i], number); } else { hashMap.put(lineWords[i], 1); } } } } public static void main(String[] args) throws FileNotFoundException { String path = "d:/"; File file = new File(path); File[] tempList = file.listFiles(); for (int i = 0; i < tempList.length; i++) { if (tempList[i].toString().endsWith("txt")) { System.out.println("文 件:" + tempList[i]); English_words(tempList[i]); } } Iterator<String> iterator = hashMap.keySet().iterator(); int max = 0; while (iterator.hasNext()) { String word = iterator.next(); if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!" ".equals(word)) { words[i]=word; out_words[i]=hashMap.get(word); i++; } } int change=0; String change_word=null; for(int j=0;j<=i;j++) { for(int k=j;k<=i;k++) { if(out_words[k]>out_words[j]) { change=out_words[j]; change_word=words[j]; out_words[j]=out_words[k]; words[j]=words[k]; out_words[k]=change; words[k]=change_word; } } } Scanner scan = new Scanner(System.in); int ms = scan.nextInt(); for(int j=0;j<ms;j++) { System.out.println(words[j]+" 出现次数:"+out_words[j]); } } }