hadoop FileSplit
/** A section of an input file. Returned by {@link * InputFormat#getSplits(JobContext)} and passed to * {@link InputFormat#createRecordReader(InputSplit,TaskAttemptContext)}. * * 文件的一部分,通过InputFormat#getSplits(JobContext)生成 * 作为参数生产RecordReader:InputFormat#createRecordReader(InputSplit,TaskAttemptContext) * 实现了InputSplit接口 */ @InterfaceAudience.Public @InterfaceStability.Stable public class FileSplit extends InputSplit implements Writable { private Path file; private long start; private long length; private String[] hosts; private SplitLocationInfo[] hostInfos; public FileSplit() {} /** Constructs a split with host information * * @param file the file name。 文件名称 * @param start the position of the first byte in the file to process。第一个byte的偏移量 * @param length the number of bytes in the file to process。 split的长度 * @param hosts the list of hosts containing the block, possibly null。 split所在的主机列表 */ public FileSplit(Path file, long start, long length, String[] hosts) { this.file = file; this.start = start; this.length = length; this.hosts = hosts; } /** Constructs a split with host and cached-blocks information * * @param file the file name。 文件名称 * @param start the position of the first byte in the file to process。第一个byte的偏移量 * @param length the number of bytes in the file to process split的长度 * @param hosts the list of hosts containing the block split所在的主机列表 * @param inMemoryHosts the list of hosts containing the block in memory 在内存中保存block的机器列表 */ public FileSplit(Path file, long start, long length, String[] hosts, String[] inMemoryHosts) { this(file, start, length, hosts); hostInfos = new SplitLocationInfo[hosts.length]; for (int i = 0; i < hosts.length; i++) { // because N will be tiny, scanning is probably faster than a HashSet boolean inMemory = false; for (String inMemoryHost : inMemoryHosts) { if (inMemoryHost.equals(hosts[i])) { inMemory = true; break; } } hostInfos[i] = new SplitLocationInfo(hosts[i], inMemory); } } /** The file containing this split's data. */ public Path getPath() { return file; } /** The position of the first byte in the file to process. */ public long getStart() { return start; } /** The number of bytes in the file to process. */ @Override public long getLength() { return length; } @Override public String toString() { return file + ":" + start + "+" + length; } //////////////////////////////////////////// // Writable methods //////////////////////////////////////////// @Override public void write(DataOutput out) throws IOException { Text.writeString(out, file.toString()); out.writeLong(start); out.writeLong(length); } @Override public void readFields(DataInput in) throws IOException { file = new Path(Text.readString(in)); start = in.readLong(); length = in.readLong(); hosts = null; } @Override public String[] getLocations() throws IOException { if (this.hosts == null) { return new String[]{}; } else { return this.hosts; } } @Override @Evolving public SplitLocationInfo[] getLocationInfo() throws IOException { return hostInfos; } }