单词统计
1统计字母出现的概率(不分大小写)
import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.text.DecimalFormat; import java.text.NumberFormat; public class a { private static NumberFormat nf = new DecimalFormat("0.00"); public static void main(String[] args) throws IOException { BufferedReader br = new BufferedReader(new FileReader("f:/piao.txt")); StringBuilder sb = new StringBuilder(); while (true) { String line = br.readLine(); if (line == null) break; sb.append(line); } br.close(); int[] characters = new int[128]; for (byte bt : sb.toString().getBytes()) if (bt > 0 && bt < 128) characters[bt]++; int totalCount = 0; for (int i = 'a'; i <= 'z'; i++) { totalCount += (characters[i] + characters[i - 32]); } double c; for (int i = 'a'; i <= 'z'; i++) { c=(double)(Math.round((characters[i] + characters[i - 32]) * 100 / totalCount)/100.0); System.out.print((char) i + "=" + (characters[i] + characters[i - 32]) + "(" +c + "),"); } } }
截图:
2统计单词出现的次数
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 | import java.io.BufferedReader; import java.io.FileReader; import java.util.*; public class b { public static void main(String [] args) throws Exception { BufferedReader br = new BufferedReader( new FileReader( "f:/飘c1.txt" )); StringBuffer sb = new StringBuffer(); String text = null ; while ((text=br.readLine())!= null ){ sb.append(text); // 将读取出的字符追加到stringbuffer中 } br.close(); // 关闭读入流 String str = sb.toString().toLowerCase(); // 将stringBuffer转为字符并转换为小写 String[] words = str.split( "[^(a-zA-Z)]+" ); // 非单词的字符来分割,得到所有单词 Map<String ,Integer> map = new HashMap<String, Integer>() ; for (String word :words){ if (map.get(word)== null ){ // 若不存在说明是第一次,则加入到map,出现次数为1 map.put(word, 1 ); } else { map.put(word,map.get(word)+ 1 ); // 若存在,次数累加1 } } // 排序 List<Map.Entry<String ,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet()); Comparator<Map.Entry<String,Integer>> comparator = new Comparator<Map.Entry<String, Integer>>() { public int compare(Map.Entry<String, Integer> left, Map.Entry<String, Integer> right) { return (left.getValue().compareTo(right.getValue())); } }; // 集合默认升序升序 Collections.sort(list,comparator); for ( int i= 0 ;i<list.size();i++){ // 由高到低输出 System.out.println(list.get(list.size()-i- 1 ).getKey() + ":" +list.get(list.size()-i- 1 ).getValue()); } } } |
截图
3去除无用词
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | public class English_word { public static void main(String[] args) throws FileNotFoundException { File file = new File( "D:\\Englis_letter.txt" ); // 读取文件 String words[] = new String [ 100000 ]; int out_words[] = new int [ 100000 ]; if (!file.exists()) { // 如果文件打不开或不存在则提示错误 System.out.println( "文件不存在" ); return ; } Scanner x = new Scanner(file); HashMap<String, Integer> hashMap = new HashMap<String, Integer>(); while (x.hasNextLine()) { String line = x.nextLine(); String[] lineWords = line.split( "[\\s+\t”“();,.?!\n]" ); Set<String> wordSet = hashMap.keySet(); for ( int i = 0 ; i < lineWords.length; i++) { if (wordSet.contains(lineWords[i])) { Integer number = hashMap.get(lineWords[i]); number++; hashMap.put(lineWords[i], number); } else { hashMap.put(lineWords[i], 1 ); } } } Iterator<String> iterator = hashMap.keySet().iterator(); int max = 0 ,i= 0 ; while (iterator.hasNext()) { String word = iterator.next(); if (! "" .equals(word)&&word!= null &&! "a" .equals(word)&&! "the" .equals(word)&&! " " .equals(word)) { System.out.println(word); words[i]=word; out_words[i]=hashMap.get(word); i++; } } int change= 0 ; String change_word= null ; for ( int j= 0 ;j<=i;j++) { for ( int k=j;k<=i;k++) { if (out_words[k]>out_words[j]) { change=out_words[j]; change_word=words[j]; out_words[j]=out_words[k]; words[j]=words[k]; out_words[k]=change; words[k]=change_word; } } } Scanner scan = new Scanner(System.in); int ms = scan.nextInt(); for ( int j= 0 ;j<ms;j++) { System.out.println(words[j]+ " 出现次数:" +out_words[j]); } } |
4遍历文件统计
public class test { static String words[] = new String [100000]; static int out_words[] = new int [100000]; static int i=0; static HashMap<String, Integer> hashMap = new HashMap<String, Integer>(); public static void English_words(File ms) throws FileNotFoundException { File file = new File(ms.toString());// 读取文件 if (!file.exists()) {// 如果文件打不开或不存在则提示错误 System.out.println("文件不存在"); return; } Scanner x = new Scanner(file); while (x.hasNextLine()) { String line = x.nextLine(); String[] lineWords = line.split("[\\s+\t”“();,.?!\n]"); Set<String> wordSet = hashMap.keySet(); for (int i = 0; i < lineWords.length; i++) { if (wordSet.contains(lineWords[i])) { Integer number = hashMap.get(lineWords[i]); number++; hashMap.put(lineWords[i], number); } else { hashMap.put(lineWords[i], 1); } } } } public static void main(String[] args) throws FileNotFoundException { String path = "d:/"; File file = new File(path); File[] tempList = file.listFiles(); for (int i = 0; i < tempList.length; i++) { if (tempList[i].toString().endsWith("txt")) { System.out.println("文 件:" + tempList[i]); English_words(tempList[i]); } } Iterator<String> iterator = hashMap.keySet().iterator(); int max = 0; while (iterator.hasNext()) { String word = iterator.next(); if(!"".equals(word)&&word!=null&&!"a".equals(word)&&!"the".equals(word)&&!" ".equals(word)) { words[i]=word; out_words[i]=hashMap.get(word); i++; } } int change=0; String change_word=null; for(int j=0;j<=i;j++) { for(int k=j;k<=i;k++) { if(out_words[k]>out_words[j]) { change=out_words[j]; change_word=words[j]; out_words[j]=out_words[k]; words[j]=words[k]; out_words[k]=change; words[k]=change_word; } } } Scanner scan = new Scanner(System.in); int ms = scan.nextInt(); for(int j=0;j<ms;j++) { System.out.println(words[j]+" 出现次数:"+out_words[j]); } } }
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步