package com.xxj; import java.io.*; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.util.HashSet; import java.util.Set; import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.atomic.AtomicLong; /** * ClassName:BigFileReader * Package:com.xxj * Description:描述信息 * * @Date:2022/9/18 12:49 * @author:alpha_Joker */ public class BigFileReader { private int threadSize; private String charset; private int bufferSize; private IHandle handle; private ExecutorService executorService; private long fileLength; private RandomAccessFile randomAccessFile; private Set<StartEndPair> startEndPairs; private CyclicBarrier cyclicBarrier; private AtomicLong counter = new AtomicLong(0); private BigFileReader(File file, IHandle handle, String charset, int bufferSize, int threadSize) { this.fileLength = file.length(); this.handle = handle; this.charset = charset; this.bufferSize = bufferSize; this.threadSize = threadSize; try { this.randomAccessFile = new RandomAccessFile(file, "r"); } catch (FileNotFoundException e) { e.printStackTrace(); } this.executorService = Executors.newFixedThreadPool(threadSize); startEndPairs = new HashSet<BigFileReader.StartEndPair>(); } private void calculateStartEnd(long start, long size) throws IOException { if (start > fileLength - 1) { return; } StartEndPair pair = new StartEndPair(); pair.start = start; long endPosition = start + size - 1; if (endPosition >= fileLength - 1) { pair.end = fileLength - 1; startEndPairs.add(pair); return; } randomAccessFile.seek(endPosition); byte tmp = (byte) randomAccessFile.read(); while (tmp != '\n' && tmp != '\r') { endPosition++; if (endPosition >= fileLength - 1) { endPosition = fileLength - 1; break; } randomAccessFile.seek(endPosition); tmp = (byte) randomAccessFile.read(); } pair.end = endPosition; startEndPairs.add(pair); calculateStartEnd(endPosition + 1, size); } public void shutdown() { try { this.randomAccessFile.close(); } catch (IOException e) { e.printStackTrace(); } this.executorService.shutdown(); } private void handle(byte[] bytes) throws UnsupportedEncodingException { String line = null; if (this.charset == null) { line = new String(bytes); } else { line = new String(bytes, charset); } if (line != null && !"".equals(line)) { this.handle.handle(line); counter.incrementAndGet(); } } //切片 private static class StartEndPair { public long start; public long end; @Override public String toString() { return "star=" + start + "end=" + end; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + (int) (end ^ (end >>> 32)); result = prime * result + (int) (start ^ (start >>> 32)); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; StartEndPair other = (StartEndPair) obj; if (end != other.end) return false; if (start != other.start) return false; return true; } } //读取 private class SliceReaderTask implements Runnable { private long start; private long sliceSize; private byte[] readBuff; public SliceReaderTask(StartEndPair pair) { this.start = pair.start; this.sliceSize = pair.end - pair.start + 1; this.readBuff = new byte[bufferSize]; } @Override public void run() { try { MappedByteBuffer mapBuffer = randomAccessFile.getChannel().map(FileChannel.MapMode.READ_ONLY, start, this.sliceSize); ByteArrayOutputStream bos = new ByteArrayOutputStream(); for (int offset = 0; offset < sliceSize; offset += bufferSize) { int readLength; if (offset + bufferSize <= sliceSize) { readLength = bufferSize; } else { readLength = (int) (sliceSize); } mapBuffer.get(readBuff, 0, readLength); for (int i = 0; i < readLength; i++) { byte tmp = readBuff[i]; if (tmp == '\n' || tmp == '\r') { handle(bos.toByteArray()); bos.reset(); } else { bos.write(tmp); } } } if (bos.size() > 0) { handle(bos.toByteArray()); } cyclicBarrier.await(); } catch (Exception e) { e.printStackTrace(); } } } public static class Builder { private int threadSize = 1; private String charset = null; private int bufferSize = 1024 * 1024; private IHandle handle; private File file; public Builder(String file, IHandle handle) { this.file = new File(file); if (!this.file.exists()) throw new IllegalArgumentException("文件不存在"); this.handle = handle; } public Builder withTreadSize(int size) { this.threadSize = size; return this; } public Builder withCharset(String charset) { this.charset = charset; return this; } public Builder withBufferSize(int bufferSize) { this.bufferSize = bufferSize; return this; } public BigFileReader build() { return new BigFileReader(this.file, this.handle, this.charset, this.bufferSize, this.threadSize); } } public void start() { long everySize = this.fileLength / this.threadSize; try { calculateStartEnd(0, everySize); } catch (Exception e) { e.printStackTrace(); return; } final long startTime = System.currentTimeMillis(); cyclicBarrier = new CyclicBarrier(startEndPairs.size(), new Runnable() { @Override public void run() { long time = System.currentTimeMillis() - startTime; System.out.println("time:"+time); System.out.println("line:"+counter.get()); } }); for (StartEndPair pair : startEndPairs) { System.out.println(pair); this.executorService.execute(new SliceReaderTask(pair)); } } }
package com.xxj; /** * ClassName:IHandle * Package:com.xxj * Description:描述信息 * * @Date:2022/9/18 12:48 * @author:alpha_Joker */ public interface IHandle { void handle(String line); }
package com.xxj; import cn.hutool.json.JSONArray; import cn.hutool.json.JSONObject; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; /** * ClassName:Test * Package:com.xxj * Description:描述信息 * * @Date:2022/9/18 15:11 * @author:alpha_Joker */ public class Test { public static void main(String[] args) { // fre BigFileReader.Builder builder =new BigFileReader.Builder("C:\\Users\\Administrator\\Desktop\\fre.txt", new IHandle() { @Override public void handle(String line) { // System.out.println(line); JSONObject jsonObject = new JSONObject(); String [] str = line.split("\\s+"); String[] _str = reverseArray(str); // System.out.println("数据:"+Arrays.toString(_str)); if (str.length ==1 && str[0].length() == 14) { jsonObject.putOpt("time", str[0]); System.out.println(jsonObject.toJSONString(0)); } if (_str.length == 11) { // jsonObject.putOpt("Iteam", str[0]); jsonObject.putOpt("Dtail", str[1]+" "+str[2]+" "+str[3]+" "+str[4]); jsonObject.putOpt("F", _str[5]); jsonObject.putOpt("max", _str[4]); jsonObject.putOpt("mem", _str[3]); jsonObject.putOpt("nem", _str[2]); jsonObject.putOpt("csf", _str[1]); jsonObject.putOpt("asd", _str[0]); System.out.println(jsonObject.toJSONString(0)); } if (_str.length == 10 || _str.length == 9) { // jsonObject.putOpt("Iteam", str[0]); if (_str.length == 10) jsonObject.putOpt("Dtail", str[1]+" "+str[2]+" "+str[3]); if (_str.length == 9) jsonObject.putOpt("Dtail", str[1]+" "+str[2]); jsonObject.putOpt("F", _str[5]); jsonObject.putOpt("max", _str[4]); jsonObject.putOpt("mem", _str[3]); jsonObject.putOpt("nem", _str[2]); jsonObject.putOpt("csf", _str[1]); jsonObject.putOpt("asd", _str[0]); System.out.println(jsonObject.toJSONString(0)); } // System.out.println(str.length+"--"+line.startsWith(" ")); } }); // builder.withThreadSize(10).withCharset("gbk").withBufferSize("1024*1024"); builder.withTreadSize(1).withCharset("utf-8").withBufferSize(1024*1024); BigFileReader bigFileReader = builder.build(); bigFileReader.start(); } /** * 使用Collections.reverse来倒置数组 * * @param strArray 原来的字符串数组 * @return */ public static String[] reverseArray(String[] strArray) { //定一个新的数组 String[] newArray = new String[strArray.length]; List<String> list = new ArrayList<>(); Collections.addAll(list, strArray); Collections.reverse(list); list.toArray(newArray); return newArray; } }