Java版统计文件中的每个单词出现次数
正则表达式之Pattern和Matcher,请参见转载博客 http://www.cnblogs.com/haodawang/p/5967219.html
代码实现:
1 import java.io.BufferedReader; 2 import java.io.FileNotFoundException; 3 import java.io.FileReader; 4 import java.io.IOException; 5 import java.util.Map; 6 import java.util.Set; 7 import java.util.TreeMap; 8 import java.util.regex.Matcher; 9 import java.util.regex.Pattern; 10 11 public class CountWorks { 12 public static void main(String[] args) { 13 BufferedReader br = null; 14 try { 15 br = new BufferedReader(new FileReader("short.txt")); 16 } catch (FileNotFoundException e) { 17 e.printStackTrace(); 18 } 19 StringBuffer sb1 = new StringBuffer(); 20 String line = null; 21 try { 22 while((line = br.readLine()) != null) { 23 sb1 = sb1.append(line); 24 } 25 } catch (IOException e) { 26 e.printStackTrace(); 27 } 28 try { 29 br.close(); 30 } catch (IOException e1) { 31 e1.printStackTrace(); 32 } 33 String sb=sb1.toString().toLowerCase(); 34 Pattern pattern = Pattern.compile("[a-zA-Z']+");//a 到 z 或 A 到 Z,两头的字母包括在内(范围) 35 Matcher matcher = pattern.matcher(sb); 36 Map<String, Integer> map = new TreeMap<String, Integer>(); 37 String word = ""; 38 Integer num = null; 39 int total = 0; 40 41 while(matcher.find()) { 42 word = matcher.group(); 43 total ++; 44 if(map.containsKey(word)) { 45 num = map.get(word);//get(),返回指定键值所映射的值,取出map中word单词的个数 46 num += 1; 47 } else { 48 num = 1; 49 } 50 map.put(word, num); 51 } 52 Set<String> keys = map.keySet(); 53 for (String key : keys) { 54 Integer value = map.get(key); 55 System.out.printf("%s: %s\n", key, value); 56 } 57 System.out.println(); 58 System.out.println("total words : " + total); 59 System.out.println("different words : " + map.size()); 60 } 61 }