java 多线程对List中的数据进行操作

首先吐槽python的多线程是真的垃圾。。。

 

业务:

对文件里的近2万条数据进行处理,然后存回文件

 

0. 读取txt存入ArrayList

1. 把ArrayList以2000为一组切割

2. 把2000数据存入各自的线程中

3.把线程放入线程池

4.线程池运行完毕后把结果存回txt

 

 

package edu.thu.xlore.unitId;

import java.io.*;
import java.util.ArrayList;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;

public class Test {

    public void unitFile(String filePath){

        System.out.println("unitFile start");

        ArrayList<String> testArrayList = new ArrayList<>();

        File file = new File(filePath);
        if (!file.exists()) {
            return;
        }
        //从txt中读取数据
        BufferedReader bufferedReaderRaw = null;
        try {
            bufferedReaderRaw = new BufferedReader(new FileReader(file));
            int count = 0;
            while (true) {
                String line = bufferedReaderRaw.readLine();
                if(line == null){
                    break;
                }
                testArrayList.add(line);
            }

        } catch (Exception e) {
            e.printStackTrace();
        }finally {
            if(bufferedReaderRaw != null) {
                try {
                    bufferedReaderRaw.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }


        System.out.println("unitFile: 读取完毕,数量:" + testArrayList.size());

        ArrayList<ArrayList<String>> arrayListArrayList = new ArrayList<>();


        if(testArrayList.size() < 2000){
            arrayListArrayList.add(testArrayList);
        }
        //如果数量大于2000,分割ArrayList进行多线程;
        else {
            for (int i = 0; i < testArrayList.size(); i++) {
                int num = i / 2000;

                if (i % 2000 == 0) {   // i = 0, 2000, 4000...
                    arrayListArrayList.add(new ArrayList<String>(2000));
                }
                if (arrayListArrayList.size() == num + 1) {
                    arrayListArrayList.get(num).add(testArrayList.get(i));
                }
            }
        }

        System.out.println("unitFile: 分割数量:" + arrayListArrayList.size());

        Long time_start = System.currentTimeMillis();
//使用线程池
        ThreadPoolExecutor threadPoolExecutor = new ThreadPoolExecutor(10, 10, 0, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<Runnable>());

        for(int i = 0; i < arrayListArrayList.size(); i++){
            UnitThread unitThread = new UnitThread(arrayListArrayList, i);
            threadPoolExecutor.execute(unitThread);
        }

        threadPoolExecutor.shutdown(); // shutdown线程池会把已经提交的剩余线程执行完然后关闭,  shutdownNow是直接关闭执行中的线程返回剩余没执行的线程
        while(true) {  //等线程全部执行完毕
            //System.out.println("线程池剩余线程数量:" + threadPoolExecutor.getActiveCount());
            if (threadPoolExecutor.isTerminated()) {
                System.out.println("线程全部运行完毕");
                break;
            }
        }

        Long time_end = System.currentTimeMillis();

        //结果存回txt
        FileWriter fileWriter = null;
        BufferedWriter bufferedWriter = null;
        try {
            fileWriter = new FileWriter(file);
            bufferedWriter = new BufferedWriter(fileWriter);

            for(int i = 0; i < arrayListArrayList.size(); i++){
                System.out.println("共有数据: " + arrayListArrayList.get(i).size());
                for(int j = 0; j < arrayListArrayList.get(i).size(); j++) {
                    bufferedWriter.write(arrayListArrayList.get(i).get(j) + "\n");
                }
            }
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
            try {
                if(fileWriter != null) {
                    fileWriter.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }



        System.out.println("共用时:" + (time_end - time_start) + "毫秒");
    }


    //内部线程类
    public class UnitThread extends Thread{
        private int pageIndex;
        private ArrayList<ArrayList<String>> arrayListArrayList;

        public UnitThread(ArrayList<ArrayList<String>> arrayListArrayList, int pageIndex){ //线程不能取得局部变量,只能作为参数传进来,ArrayList是引用变量,所以值可以直接修改,不需要返回结果。
            this.pageIndex = pageIndex;
            this.arrayListArrayList = arrayListArrayList;
        }

        @Override
        public void run(){
            System.out.println("线程" + pageIndex + "开始");
            ArrayList<String> unitedCategory = dealwithArrayList(arrayListArrayList.get(pageIndex));
            arrayListArrayList.set(this.pageIndex, unitedCategory); //把新的list传回给list集合
            System.out.println("线程" + pageIndex + "运行完毕");
        }

    }

    public ArrayList<String> dealwithArrayList(ArrayList<String> arrayList){
        ArrayList<String> reArrayList = new ArrayList<>();

        //要对list进行的操作
        for(int i = 0 ; i < arrayList.size(); i++){
            reArrayList.add(arrayList.get(i) + "  已经处理完了");
        }

        return reArrayList;
    }

    public static void main(String[] args){
        Test test = new Test();
        String filePath = "C:\\Users\\Administrator\\Desktop\\test\\wordFile.txt";
        test.unitFile(filePath);
    }
}

 

运行结果:

unitFile start
unitFile: 读取完毕,数量:19399
unitFile: 分割数量:10
线程0开始
线程1开始
线程3开始
线程2开始
线程0运行完毕
线程1运行完毕
线程3运行完毕
线程4开始
线程2运行完毕
线程5开始
线程8开始
线程4运行完毕
线程8运行完毕
线程5运行完毕
线程9开始
线程6开始
线程9运行完毕
线程7开始
线程6运行完毕
线程7运行完毕
线程全部运行完毕
共有数据: 2000
共有数据: 2000
共有数据: 2000
共有数据: 2000
共有数据: 2000
共有数据: 2000
共有数据: 2000
共有数据: 2000
共有数据: 2000
共有数据: 1399
共用时:8毫秒

Process finished with exit code 0

posted @ 2020-09-28 16:43  不咬人的兔子  阅读(3324)  评论(1编辑  收藏  举报