package ccb.huge;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.FutureTask;

/**
* 大文件多线程读取测试
*/
public class Entrance {

final static String filePath = "d:/ccb/test.txt";

public static void main(String[] args) throws InterruptedException {

File file = new File(filePath);
long fc = file.length();

/* 多线程模式一 */
// Giraffe3(file);

/* 多线程模式二 */
// Giraffe2(file);

/* 线程管理器(三)*/
// Giraffe1(file);

/* 多线程模式四 */
Monkey(file, fc);

/* 多线程模式五 */
// Koala();
}

private static void Giraffe3(File file) {
GiraffeThread[] giraffe = new GiraffeThread[3];
for (int i = 0; i < 3; i++) {
giraffe[i] = new GiraffeThread(file);
giraffe[i].start();
}
}

private static void Giraffe2(File file) {
Thread thread1 = new GiraffeThread(file);
Thread thread2 = new GiraffeThread(file);
Thread thread3 = new GiraffeThread(file);
thread1.start();
thread2.start();
thread3.start();
}

private static void Giraffe1(File file) {
ExecutorService exec = Executors.newFixedThreadPool(2);
for (int i = 0; i < 2; i++) {
exec.execute(new GiraffeThread(file));
}
exec.shutdown();
}

// 按字节数平均分配多线程同时读取文件
private static void Monkey(File file, long fc) {
file = new File("d:/ccb/new.dat");
// file = new File("d:/ccb/qlhd.txt");
fc = file.length();

// todo: 需要改进:如果文件长度拆分后没有余数,线程数不需要+1
int thTotal = 6; // 线程数
int mo = (int) (fc % thTotal); // 余数
int size = (int) (fc - mo) / thTotal; // 每次读取长度

MonkeyThread mk[] = new MonkeyThread[thTotal + 1];

for (int i = 0; i <= thTotal; i++) {
mk[i] = new MonkeyThread(file, i * size, mo, size, (int) fc);
mk[i].start();
}
}

private static void Koala() throws InterruptedException {
String[] fileList = {"d:/ccb/test.txt", "d:/ccb/ONL.dat.result", "d:/ccb/new.dat", "d:/ccb/sixno.txt",};
// System.out.println("fileList = " + fileList.length);

// 总是创建新线程
// ExecutorService exec = Executors.newCachedThreadPool();
ExecutorService exec = Executors.newFixedThreadPool(4);
// 任务列表
List<FutureTask<Long>> taskList = new ArrayList<>();
for (int i = 0; i < fileList.length; i++) {
for (int j = 0; j < fileList.length; j++) {
FutureTask ft = new FutureTask<Long>(new KoalaThread(new File(fileList[i])));
taskList.add(ft);
// exec.submit(ft);
exec.submit(ft);
}
}
System.out.println(
"主线程--" + Thread.currentThread().getId() +
"程结束!" + Thread.currentThread().getName());
Thread.sleep(5000);

int totalResult = 0;
// 开始统计各线程执行结果
for (FutureTask<Long> ft : taskList) {
try {
long result = ft.get();
totalResult += result;
System.out.printf("线程执行时间:%s, 合计时间:%s\n", result, totalResult);
System.out.println("-----------------------------");
System.out.println(ft.toString());
System.out.println("-----------------------------");

} catch (InterruptedException | ExecutionException e) {
e.printStackTrace();
}
}
// 关闭线程池
exec.shutdown();
System.out.println("所有线程执行完毕!共计耗时:" + totalResult);
}

/**
* 给定文件长度和线程数,计算拆分文件的偏移。方便 RandomAccessReader.read() 方法使用
*
* @param fileLength
* @param threadTotal
* @return
*/
private static int[] getPerStart(long fileLength, int threadTotal) {
// 取余
int mo = (int) (fileLength % threadTotal);
int start = (int) ((fileLength - mo) / threadTotal);

// 起始点
int[] begin = new int[threadTotal + 2];
begin[0] = 0;
for (int i = 1; i <= threadTotal; i++) {
begin[i] = (i) * start;
}
begin[threadTotal + 1] = begin[begin.length - 1] + mo;
System.out.println(String.format("总%d, mo=%d, list=%s", fileLength, mo, Arrays.toString(begin)));
return begin;
}

}

 

里面用到的各个线程类:

package ccb.huge;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;

/**
 * 考拉项目: 单个线程读取完成一个大文件需要的时间
 */
public class KoalaThread implements Callable<Long> {

    private long result;
    private File file;

    public KoalaThread(File file) {
        this.file = file;
    }

    @Override
    public Long call() throws Exception {

        long t1 = System.currentTimeMillis();
        BufferedReader reader = null;
        String line;
        List<String> targetList = new ArrayList<>();
        try {
            reader = new BufferedReader(new FileReader(file));
            while ((line = reader.readLine()) != null) {
                if (line.indexOf("12345") != -1) {
                    targetList.add(line);
                }
            }
            // System.out.printf("线程执行完毕,%s 文件中共找到%s个目标行:\n%s\n\n",
            //         file.toString(), targetList.size(), targetList.toString());

            System.out.printf("线程:%s -- %s\n",
                    Thread.currentThread().getName(), Thread.currentThread().getId());

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

        return System.currentTimeMillis() - t1;
    }

}

  另外一个:

package ccb.huge;

import java.io.File;
import java.io.RandomAccessFile;

/**
* 猴子项目
* 按起始位置,多线程读取同一个文件
*/
public class MonkeyThread extends Thread {

//线程名称
private long id;
private String name;

private File file;
private int start;
private int end;
private int size;
private int fileLen;
private int mo;

public MonkeyThread(File file, int start, int mo, int size, int fileLen) {
this.file = file;
this.start = start;
this.end = start + size;
this.size = size;
this.fileLen = fileLen;
this.mo = mo;
}

@Override
public void run() {

this.id = Thread.currentThread().getId();
this.name = Thread.currentThread().getName();

RandomAccessFile raf = null;
int n = 0;

try {

raf = new RandomAccessFile(file, "r");

synchronized (file) {
/*
个人理解:
多线程同时读取,所以每个线程读取后,会将指针保留在读取完成的位置
所以,每个线程开始读取前,先把指针从文件起始位置调整到本线程应当开始读取的位置
void seek(long pos) 指定从文件起始位置开始的指针偏移量
*/
// System.out.println("filePointer=" + raf.getFilePointer());
raf.seek(start);
System.out.println(String.format("(%s)%s:第 %s 次读取,start=%s end=%s fileLen=%s,内容如下: ",
Thread.currentThread().getId(),
Thread.currentThread().getName(), ++n, start, end, fileLen));
int len = 0;
byte[] buff = new byte[size];
int sum = start + size;

if (sum < fileLen) {
len = raf.read(buff, 0, size);
} else {
len = raf.read(buff, 0, mo);
System.out.println("-----------mo=" + mo);
}

// System.out.println("len=" + len);
System.out.println(new String(buff, 0, len));
System.out.println();
/* 不能用 readLine 方法 */
// for (String line = raf.readLine(); line != null;) {
// System.out.print(String.format("%s-%s--", id, name));
// System.out.println(line);
// }
}
} catch (Exception e) {
e.printStackTrace();
} finally {
try {
if (raf != null) {
raf.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
}

  最后一个:

package ccb.huge;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.List;

/**
 * 长颈鹿项目
 * 测试使用多线程读取文件
 */
public class GiraffeThread extends Thread {

    //线程名称
    private long id;
    private String name;
    //文件对象
    private File file;

    //构造函数
    public GiraffeThread(File file) {
        this.file = file;
    }

    @Override
    public void run() {

        this.id = Thread.currentThread().getId();
        this.name = Thread.currentThread().getName();

        BufferedReader buff = null;
        FileReader reader = null;
        try {
            // sleep(20);
            reader = new FileReader(file);
            buff = new BufferedReader(reader);
            List<String> lines = new ArrayList<>();
            String line;
            int i = 0;  // 文件行数
            int n = 0;  // 循环读取的次数

            synchronized (file) {
                while ((line = buff.readLine()) != null) {
                    lines.add(line);
                    i++;
                    if ((i % 5) == 0) {
                        System.out.println(String.format("%s(%s)-- 读取次数:%s ", name, id, ++n));
                        for (String tmpLine : lines) {
                            System.out.println(tmpLine);
                        }
                        lines.clear();
                    }
                }
            }
            System.out.println(name + "(" + id + ")  任务完毕!");

        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            try {
                if (buff != null) {
                    buff.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            try {
                if (reader != null) {
                    reader.close();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
        }

    }
}

  

posted on 2021-10-31 00:03  jarod99  阅读(1825)  评论(0编辑  收藏  举报