

private final MapOutputCollector<K,V> collector;//负责实际的输出操作
private final org.apache.hadoop.mapreduce.Partitioner<K,V> partitioner;//对键空间进行分区
private final int partitions;//分区数量。与Reducer的数量同样

collector = createSortingCollector(job, reporter);
partitions = jobContext.getNumReduceTasks();
if (partitions > 1) {
partitioner = (org.apache.hadoop.mapreduce.Partitioner<K,V>)
ReflectionUtils.newInstance(jobContext.getPartitionerClass(), job);
} else {
partitioner = new org.apache.hadoop.mapreduce.Partitioner<K,V>() {
public int getPartition(K key, V value, int numPartitions) {
return partitions - 1;


MapOutputCollector<KEY, VALUE> collector= (MapOutputCollector<KEY, VALUE>)
MapOutputBuffer.class, MapOutputCollector.class), job);
LOG.info("Map output collector class = " + collector.getClass().getName());
MapOutputCollector.Context context =new MapOutputCollector.Context(this, job, reporter);
return collector;



//sanity checks
final float spillper =job.getFloat(JobContext.MAP_SORT_SPILL_PERCENT, (float)0.8);
final int sortmb = job.getInt(JobContext.IO_SORT_MB, 100);
//mapreduce.task.index.cache.limit.bytes,默认值为1024 * 1024(1M)
indexCacheMemoryLimit = job.getInt(JobContext.INDEX_CACHE_MEMORY_LIMIT,                                     INDEX_CACHE_MEMORY_LIMIT_DEFAULT);
if (spillper > (float)1.0 || spillper <= (float)0.0) {
    throw new IOException("Invalid \"" + JobContext.MAP_SORT_SPILL_PERCENT +
     "\": " + spillper);
//sortmb的最大值为2047Mb(111 1111 1111),取sortmb的最低11位
if ((sortmb & 0x7FF) != sortmb) {
    throw new IOException("Invalid \"" + JobContext.IO_SORT_MB + "\": " + sortmb);
sorter = ReflectionUtils.newInstance(job.getClass("map.sort.class",
QuickSort.class, IndexedSorter.class), job);
// buffers and accounting
int maxMemUsage = sortmb << 20;
//METASIZE=16,maxMemUsage=sortmb << 20
maxMemUsage -= maxMemUsage % METASIZE;
kvbuffer = new byte[maxMemUsage];
bufvoid = kvbuffer.length;
kvmeta = ByteBuffer.wrap(kvbuffer).order(ByteOrder.nativeOrder()).asIntBuffer();

所有初始化为0 bufstart = bufend = bufindex = equator; //kvstart:标记溢出元数据的起源,kvend:标记溢出元数据的结束位置 //kvindex:标记全然序列化的记录的结束位置 kvstart = kvend = kvindex; maxRec = kvmeta.capacity() / NMETA; softLimit = (int)(kvbuffer.length * spillper); bufferRemaining = softLimit;



final int mstart = kvend / NMETA;
// kvend is a valid record
final int mend = 1 + (kvstart >= kvend? kvstart
          : kvmeta.capacity() + kvstart) / NMETA;
sorter.sort(MapOutputBuffer.this, mstart, mend, reporter);


int spindex = mstart;
final IndexRecord rec = new IndexRecord();
final InMemValBytes value = new InMemValBytes();
for (int i = 0; i < partitions; ++i) {
   IFile.Writer<K, V> writer = null;
   try {
       long segmentStart = out.getPos();
       writer = new Writer<K, V>(job, out, keyClass, valClass, codec,spilledRecordsCounter);
       if (combinerRunner == null) {
          // spill directly
          DataInputBuffer key = new DataInputBuffer();
          while (spindex < mend &&
kvmeta.get(offsetFor(spindex % maxRec) + PARTITION) == i) {
                final int kvoff = offsetFor(spindex % maxRec);
                int keystart = kvmeta.get(kvoff + KEYSTART);
                int valstart = kvmeta.get(kvoff + VALSTART);
                key.reset(kvbuffer, keystart, valstart - keystart);
                getVBytesForOffset(kvoff, value);
                writer.append(key, value);
        } else {
           int spstart = spindex;
           while (spindex < mend
&&kvmeta.get(offsetFor(spindex % maxRec)+ PARTITION) == i) {
           // Note: we would like to avoid the combiner if we've fewer
           // than some threshold of records for a partition
           if (spstart != spindex) {
             RawKeyValueIterator kvIter =new MRResultIterator(spstart, spindex);
             combinerRunner.combine(kvIter, combineCollector);
       // close the writer
       // record offsets
       rec.startOffset = segmentStart;
       rec.rawLength = writer.getRawLength();
       rec.partLength = writer.getCompressedLength();
       spillRec.putIndex(rec, i);
       writer = null;
   } finally {
       if (null != writer) writer.close();


if (totalIndexCacheMemory >= indexCacheMemoryLimit) {
 // create spill index file
 Path indexFilename =mapOutputFile.getSpillIndexFileForWrite(numSpills, partitions
                  * MAP_OUTPUT_INDEX_RECORD_LENGTH);
 spillRec.writeToFile(indexFilename, job);
} else {
 totalIndexCacheMemory +=spillRec.size() * MAP_OUTPUT_INDEX_RECORD_LENGTH;


posted @ 2016-02-28 17:11  blfshiye  阅读(314)  评论(0编辑  收藏  举报