Java 多线程读取文件并统计词频 实例 出神入化的《ThreadPoolExecutor》


重在展示多线程ThreadPoolExecutor的使用,和线程同步器CountDownLatch,以及相关CAS的原子操作和线程安全的Map/队列。
ThreadPool主线程
 1 import java.io.BufferedWriter;
 2 import java.io.File;
 3 import java.io.FileWriter;
 4 import java.util.*;
 5 import java.util.concurrent.*;
 6 import java.util.concurrent.atomic.AtomicInteger;
 7 import java.util.concurrent.atomic.AtomicReferenceArray;
 8 
 9 /**
10  * ClassName: ThreadPool
11  * Description:
12  * date: 2021/1/16 18:24
13  *
14  * @author hewei
15  */
16 public class ThreadPool {
17     /**存储词的队列**/
18     private static ArrayBlockingQueue<String> oneWordQueue = new ArrayBlockingQueue<String>(Integer.MAX_VALUE / 4);
19     /**存储词的次数**/
20     private static ConcurrentHashMap<String, AtomicInteger> wordCount = new ConcurrentHashMap<String, AtomicInteger>();
21     /**判断中英文的正则**/
22     private static String[] englishOrChinaWordCompile = {"[a-zA-Z]+", "[\\u4e00-\\u9fa5]"};
23     private static long waitTimeSecond = 5;
24     /**已知会创建10个线程池**/
25     private static CountDownLatch countDownLatch = new CountDownLatch(10);
26     /**存储词的次数排序**/
27     private static AtomicReferenceArray wordCountArray=new AtomicReferenceArray<String>(Integer.MAX_VALUE/16);
28 
29     public static void main(String[] args) {
30         BlockingQueue<Runnable> blockingQueue = new LinkedBlockingDeque<>(10000);
31         ThreadPoolExecutor readFilePool = new ThreadPoolExecutor(5, 5,
32                 waitTimeSecond, TimeUnit.SECONDS, blockingQueue, new UserThreadFactory("Study-WordCountReadFile-"),
33                 new UserRejectHandler());
34         //executor.allowCoreThreadTimeOut(true);
35         Long beginTime = System.currentTimeMillis();
36         //读取D盘的文件
37         File file = new File("E:\\tmp");
38         if (file.isDirectory()) {
39             File[] files = file.listFiles();
40             // 定义文件类型
41             for (File one : files) {
42                 String fileName = one.getName();
43                 readFilePool.execute(new WordCountReadFileTask(countDownLatch, "E:\\tmp\\" + fileName, oneWordQueue, englishOrChinaWordCompile));
44             }
45         }
46         ThreadPoolExecutor sortWordPool = new ThreadPoolExecutor(5, 5,
47                 waitTimeSecond, TimeUnit.SECONDS, blockingQueue, new UserThreadFactory("Study-WordCount-"),
48                 new UserRejectHandler());
49         //executor1.allowCoreThreadTimeOut(true);
50         for (int i = 0; i < 5; i++) {
51             sortWordPool.execute(new WordCountTask(countDownLatch, wordCount, oneWordQueue, waitTimeSecond,null));
52         }
53         try {
54             countDownLatch.await();
55             readFilePool.shutdown();
56             sortWordPool.shutdown();
57             // 写出到文件
58             List<Map.Entry<String, AtomicInteger>> list = new ArrayList(wordCount.entrySet());
59             Comparator com = new Comparator<Map.Entry<String, AtomicInteger>>(){
60                 @Override
61                 public int compare(Map.Entry<String, AtomicInteger> o1, Map.Entry<String, AtomicInteger> o2) {
62                     return ((Integer)o2.getValue().get()).compareTo((Integer) o1.getValue().get());
63                 }
64             };
65             list.sort(com);
66             // 写出到文件
67             BufferedWriter bw = new BufferedWriter(new FileWriter("E:\\read.txt"));
68             for(int i=0;i<list.size();i++){
69                 if(i<10) {
70                     System.out.println("单词  " + list.get(i).getKey() + ",次数  " + list.get(i).getValue());
71                 }
72                 bw.write("单词  "+ list.get(i).getKey()+",次数  "+ list.get(i).getValue());
73                 bw.newLine();
74             }
75             bw.flush();
76             bw.close();
77         } catch (Exception e) {
78             e.printStackTrace();
79         }
80     }
81 }

 




UserThreadFactory
 1 import java.util.concurrent.ThreadFactory;
 2 import java.util.concurrent.atomic.AtomicInteger;
 3 
 4 /**
 5  * ClassName: UserThreadFactory
 6  * Description:自定义线程创建工厂
 7  * date: 2021/1/16 18:26
 8  *
 9  * @author hewei
10  */
11 public class UserThreadFactory implements ThreadFactory {
12     /**
13      * 自定义线程名称前缀
14      **/
15     private final String prefixName;
16     /**
17      * 线程计数器 从1开始
18      */
19     private final AtomicInteger threadNumber = new AtomicInteger(1);
20 
21     public UserThreadFactory(String prefixName) {
22         this.prefixName = prefixName;
23     }
24 
25     @Override
26     public Thread newThread(Runnable runnable) {
27         //创建线程
28         String name = prefixName + threadNumber.getAndIncrement();
29         return new WorkThread(runnable,name);
30     }
31     /**
32      *自定义工作线程,定义线程名称有助于对jvm问题排查
33      */
34     class WorkThread extends Thread {
35         /**
36          * 线程名称
37          */
38         private String name;
39 
40         /**
41          * @param target  执行的方法
42          * @param name 线程的名称
43          */
44         public WorkThread(Runnable target, String name) {
45             super(target);
46             super.setName(name);
47             this.name=name;
48             System.out.println("创建:"+name);
49         }
50 
51         @Override
52         public void run() {
53             try {
54                 /**
55                  * super.run()等同于target.run()
56                  */
57                 super.run();
58             } finally {
59                 System.out.println("结束线程:" + name);
60             }
61         }
62     }
63 }
WordCountReadFileTask
 1 import java.io.BufferedReader;
 2 import java.io.FileReader;
 3 import java.util.ArrayList;
 4 import java.util.List;
 5 import java.util.concurrent.ArrayBlockingQueue;
 6 import java.util.concurrent.CountDownLatch;
 7 import java.util.regex.Matcher;
 8 import java.util.regex.Pattern;
 9 
10 /**
11  * ClassName: WordCountTask
12  * Description:
13  * date: 2021/1/17 19:48
14  *
15  * @author hewei
16  */
17 public class WordCountReadFileTask implements Runnable {
18     private String filePathAndName;
19     private ArrayBlockingQueue<String> oneWordQueue;
20     private  String[] englishOrChinaWordCompile;
21     private  CountDownLatch countDownLatch;
22 
23     public WordCountReadFileTask(CountDownLatch countDownLatch,String filePathAndName, ArrayBlockingQueue<String> oneWordQueue, String[] englishOrChinaWordCompile) {
24         this.countDownLatch=countDownLatch;
25         this.filePathAndName = filePathAndName;
26         this.oneWordQueue = oneWordQueue;
27         this.englishOrChinaWordCompile = englishOrChinaWordCompile;
28     }
29 
30     @Override
31     public void run() {
32         try {
33             BufferedReader br = new BufferedReader(new FileReader(filePathAndName));
34             StringBuffer sb = new StringBuffer();
35             List<String> strList=new ArrayList<String>();
36             String line = "";
37             while((line=br.readLine())!=null){
38                 sb.append(line);
39                 /**
40                  * 为了保证不超过Integer.max_value
41                  */
42                 if(sb.length()>50000000) {
43                     strList.add(sb.toString());
44                     /**
45                      * 清空StringBuffer
46                      * 1.delete,从到到尾
47                      * 2.new 新的对象。但会丢弃老对象加速gc到来
48                      * 3.setlength=0,不符合这里的场景
49                      */
50                     sb.delete(0,sb.length());
51                 }
52             }
53             if(sb!=null){
54                 strList.add(sb.toString());
55             }
56             br.close();
57             for(String words:strList) {
58                 for (String oneCompile : englishOrChinaWordCompile) {
59                     //正则
60                     Pattern p = Pattern.compile(oneCompile);
61                     Matcher matcher = p.matcher(words);
62                     while (matcher.find()) {
63                         /**
64                          * 添加一个元素,如果队列满,则阻塞等待队列被消费腾出空间来
65                          */
66                         oneWordQueue.put(matcher.group());
67                     }
68                 }
69             }
70         } catch (Exception e) {
71             e.printStackTrace();
72         }finally {
73             countDownLatch.countDown();
74         }
75     }
76 }
WordCountTask
 1 import java.io.BufferedReader;
 2 import java.io.FileReader;
 3 import java.util.ArrayList;
 4 import java.util.Arrays;
 5 import java.util.List;
 6 import java.util.concurrent.ArrayBlockingQueue;
 7 import java.util.concurrent.ConcurrentHashMap;
 8 import java.util.concurrent.CountDownLatch;
 9 import java.util.concurrent.TimeUnit;
10 import java.util.concurrent.atomic.AtomicInteger;
11 import java.util.concurrent.atomic.AtomicReferenceArray;
12 import java.util.regex.Matcher;
13 import java.util.regex.Pattern;
14 
15 /**
16  * ClassName: WordCountTask
17  * Description:
18  * date: 2021/1/17 19:48
19  *
20  * @author hewei
21  */
22 public class WordCountTask implements Runnable {
23     private ArrayBlockingQueue<String> oneWordQueue;
24     private ConcurrentHashMap<String, AtomicInteger> wordCount;
25     private long waitTimeSecond;
26     private  CountDownLatch countDownLatch;
27     private static AtomicReferenceArray wordCountArray;
28 
29     public WordCountTask(CountDownLatch countDownLatch,ConcurrentHashMap<String,AtomicInteger> wordCount,
30                          ArrayBlockingQueue<String> oneWordQueue,long waitTimeSecond,AtomicReferenceArray wordCountArray) {
31         this.wordCountArray=wordCountArray;
32         this.countDownLatch=countDownLatch;
33         this.oneWordQueue = oneWordQueue;
34         this.wordCount=wordCount;
35         this.waitTimeSecond=waitTimeSecond;
36     }
37 
38     @Override
39     public void run() {
40         try {
41             String oneWord;
42             AtomicInteger nowCount;
43             while ((oneWord=oneWordQueue.poll(waitTimeSecond, TimeUnit.SECONDS))!=null) {
44                 /**
45                  * 循环从队列里取出元素,然后加入到map中
46                  * 在加入map过程中,代码块会有指令重排问题。所以每一步都需要加判断。
47                  * 所以需要每一个操作都要具有原子性。
48                  */
49                 if((nowCount=wordCount.get(oneWord))==null){
50                     nowCount=new AtomicInteger(1);
51                     AtomicInteger ifExistCount=wordCount.putIfAbsent(oneWord,nowCount);
52                     if(ifExistCount!=null) {
53                         ifExistCount.getAndIncrement();
54                     }
55                 }else{
56                     nowCount.getAndIncrement();
57                 }
58                 /**
59                  * 实时排序,该排序依赖线程安全
60                  * 略
61                  */
62             }
63         } catch (Exception e) {
64             e.printStackTrace();
65         }finally {
66             countDownLatch.countDown();
67         }
68     }
69 }

 

posted @ 2021-01-18 16:57  践行渐远的蜗牛  阅读(904)  评论(0编辑  收藏  举报