03_MapReduce框架原理_3.2 Job提交流程(源码)

Hadoop
点击查看代码
/*
客户端 提交Job流程
  1. 客户端 执行Driver类的main方法
     waitForCompletion() 提交Job,并监控Job运行状态,Job完成后返回 true
  2. 提交job 
     submit()
  3. 创建连接,并创建 集群代理对象cluster
     connect()
        // 创建提交job的集群代对象
        // 根据上下文对象,判断 是创建 本地运行环境 还是yarn集群环境
        return new Cluster(getConfiguration())
  4. 提交job 到指定集群
      1. 在指定集群中 创建staging(暂存)目录,并返回路径
            //  示例 ： file:/tmp/hadoop/mapred/staging/dxm1446706250/.staging
            Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
      2. 创建JobID
            JobID jobId = submitClient.getNewJobID();
      3. 根据Jobid 创建Job提交路径
            // 示例 : file:/tmp/hadoop/mapred/staging/dxm870750042/.staging/job_local870750042_0001
            Path submitJobDir = new Path(jobStagingArea, jobId.toString());
      4. 上传 configure files, libjars, jobjars, and archives pertaining(相关文档) 到指定路径
           // rUploader.uploadResources(job, jobSubmitDir)
           // 上传 -libjars, -files, -archives
           copyAndConfigureFiles(job, submitJobDir);
      5. 根据输入文件,计算切片,并生成切片规划文件，并上传到stag路径
          // 示例  job.split、job.splitmetainfo
          // 返回切片个数
          int maps = writeSplits(job, submitJobDir);
      6. 上传 job.xml 到stag路径
          writeConf(conf, submitJobFile);
      7. 提交job,并返回提交job的状态
          status = submitClient.submitJob
*/


//1. 提交job
// 方法功能 : 提交job,并且监控job的运行状态,job完成后返回 true
val bool: Boolean = job.waitForCompletion(true)
        public boolean waitForCompletion(boolean verbose
                                         ) throws IOException, InterruptedException,
                                                  ClassNotFoundException {
          if (state == JobState.DEFINE) {
            //2. 提交job
            submit();
          }
          // 监控job运行状态
          if (verbose) {
            monitorAndPrintJob();
          } else {
            // get the completion poll interval from the client.
            int completionPollIntervalMillis = 
              Job.getCompletionPollInterval(cluster.getConf());
            while (!isComplete()) {
              try {
                Thread.sleep(completionPollIntervalMillis);
              } catch (InterruptedException ie) {
              }
            }
          }
          // Job完成,返回 true
          return isSuccessful();
        }

//2. submit()
// 方法功能 : 创建集群代理对象,并将job提交到指定集群,改变job状态为running
public void submit() 
       throws IOException, InterruptedException, ClassNotFoundException {
  ensureState(JobState.DEFINE);
  setUseNewAPI();
  // 3. 创建连接,并创建 集群代理对象cluster
  connect();
  final JobSubmitter submitter = 
      getJobSubmitter(cluster.getFileSystem(), cluster.getClient());
  status = ugi.doAs(new PrivilegedExceptionAction<JobStatus>() {
    public JobStatus run() throws IOException, InterruptedException, 
    ClassNotFoundException {
      // 4. 提交job 到指定集群
      return submitter.submitJobInternal(Job.this, cluster);
    }
  });
  state = JobState.RUNNING;
  LOG.info("The url to track the job: " + getTrackingURL());
 }

// 3. connect();
// 方法功能 : 根据配置信息 创建集群代理对象
private synchronized void connect()
        throws IOException, InterruptedException, ClassNotFoundException {
  if (cluster == null) {
    cluster = 
      ugi.doAs(new PrivilegedExceptionAction<Cluster>() {
                 public Cluster run()
                        throws IOException, InterruptedException, 
                               ClassNotFoundException {
                   // 创建提交job的集群代对象
                   // 根据上下文对象,判断 是创建 本地运行环境 还是yarn集群环境
                   return new Cluster(getConfiguration());
                 }
               });
  }
}

// 4. 提交job 到指定集群
// 方法功能 : 将job 提交到指定集群 的内部方法
JobStatus submitJobInternal(Job job, Cluster cluster) 
throws ClassNotFoundException, InterruptedException, IOException {

  //validate the jobs output specs 
  checkSpecs(job);

  Configuration conf = job.getConfiguration();
  addMRFrameworkToDistributedCache(conf);
  
  //1. 在指定集群中 创建staging(暂存)目录,并返回路径
  //  示例 ： file:/tmp/hadoop/mapred/staging/dxm1446706250/.staging
  Path jobStagingArea = JobSubmissionFiles.getStagingDir(cluster, conf);
  //configure the command line options correctly on the submitting dfs
  InetAddress ip = InetAddress.getLocalHost();
  if (ip != null) {
    submitHostAddress = ip.getHostAddress();
    submitHostName = ip.getHostName();
    conf.set(MRJobConfig.JOB_SUBMITHOST,submitHostName);
    conf.set(MRJobConfig.JOB_SUBMITHOSTADDR,submitHostAddress);
  }
  //2. 创建JobID
  JobID jobId = submitClient.getNewJobID();
  job.setJobID(jobId);
  //3. 根据Jobid 创建Job提交路径
  // 示例 : file:/tmp/hadoop/mapred/staging/dxm870750042/.staging/job_local870750042_0001
  Path submitJobDir = new Path(jobStagingArea, jobId.toString());
  JobStatus status = null;
  try {
    conf.set(MRJobConfig.USER_NAME,
        UserGroupInformation.getCurrentUser().getShortUserName());
    conf.set("hadoop.http.filter.initializers", 
        "org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer");
    conf.set(MRJobConfig.MAPREDUCE_JOB_DIR, submitJobDir.toString());
    LOG.debug("Configuring job " + jobId + " with " + submitJobDir 
        + " as the submit dir");
    // get delegation token for the dir
    TokenCache.obtainTokensForNamenodes(job.getCredentials(),
        new Path[] { submitJobDir }, conf);
    
    populateTokenCache(conf, job.getCredentials());

    // generate a secret to authenticate shuffle transfers
    if (TokenCache.getShuffleSecretKey(job.getCredentials()) == null) {
      KeyGenerator keyGen;
      try {
        keyGen = KeyGenerator.getInstance(SHUFFLE_KEYGEN_ALGORITHM);
        keyGen.init(SHUFFLE_KEY_LENGTH);
      } catch (NoSuchAlgorithmException e) {
        throw new IOException("Error generating shuffle secret key", e);
      }
      SecretKey shuffleKey = keyGen.generateKey();
      TokenCache.setShuffleSecretKey(shuffleKey.getEncoded(),
          job.getCredentials());
    }
    if (CryptoUtils.isEncryptedSpillEnabled(conf)) {
      conf.setInt(MRJobConfig.MR_AM_MAX_ATTEMPTS, 1);
      LOG.warn("Max job attempts set to 1 since encrypted intermediate" +
              "data spill is enabled");
    }
    //4. 上传 configure files, libjars, jobjars, and archives pertaining(相关文档) 到指定路径
    // rUploader.uploadResources(job, jobSubmitDir)
    // 上传 -libjars, -files, -archives
    copyAndConfigureFiles(job, submitJobDir);

    Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir);
    
    // Create the splits for the job
    LOG.debug("Creating splits at " + jtFs.makeQualified(submitJobDir));
    //5. 根据输入文件,计算切片,并生成切片规划文件，并上传到stag路径
    // 示例  job.split、job.splitmetainfo
    // 返回切片个数
    int maps = writeSplits(job, submitJobDir);

    conf.setInt(MRJobConfig.NUM_MAPS, maps);
    LOG.info("number of splits:" + maps);

    int maxMaps = conf.getInt(MRJobConfig.JOB_MAX_MAP,
        MRJobConfig.DEFAULT_JOB_MAX_MAP);
    if (maxMaps >= 0 && maxMaps < maps) {
      throw new IllegalArgumentException("The number of map tasks " + maps +
          " exceeded limit " + maxMaps);
    }

    // write "queue admins of the queue to which job is being submitted"
    // to job file.
    String queue = conf.get(MRJobConfig.QUEUE_NAME,
        JobConf.DEFAULT_QUEUE_NAME);
    AccessControlList acl = submitClient.getQueueAdmins(queue);
    conf.set(toFullPropertyName(queue,
        QueueACL.ADMINISTER_JOBS.getAclName()), acl.getAclString());

    // removing jobtoken referrals before copying the jobconf to HDFS
    // as the tasks don't need this setting, actually they may break
    // because of it if present as the referral will point to a
    // different job.
    TokenCache.cleanUpTokenReferral(conf);

    if (conf.getBoolean(
        MRJobConfig.JOB_TOKEN_TRACKING_IDS_ENABLED,
        MRJobConfig.DEFAULT_JOB_TOKEN_TRACKING_IDS_ENABLED)) {
      // Add HDFS tracking ids
      ArrayList<String> trackingIds = new ArrayList<String>();
      for (Token<? extends TokenIdentifier> t :
          job.getCredentials().getAllTokens()) {
        trackingIds.add(t.decodeIdentifier().getTrackingId());
      }
      conf.setStrings(MRJobConfig.JOB_TOKEN_TRACKING_IDS,
          trackingIds.toArray(new String[trackingIds.size()]));
    }

    // Set reservation info if it exists
    ReservationId reservationId = job.getReservationId();
    if (reservationId != null) {
      conf.set(MRJobConfig.RESERVATION_ID, reservationId.toString());
    }

    // Write job file to submit dir
    //6. 上传 job.xml 到stag路径
    writeConf(conf, submitJobFile);
    
    //
    // Now, actually submit the job (using the submit name)
    //
    printTokens(jobId, job.getCredentials());
    //7. 提交job,并返回提交job的状态
    status = submitClient.submitJob(
        jobId, submitJobDir.toString(), job.getCredentials());
    if (status != null) {
      return status;
    } else {
      throw new IOException("Could not launch job");
    }
  } finally {
    if (status == null) {
      LOG.info("Cleaning up the staging area " + submitJobDir);
      if (jtFs != null && submitJobDir != null)
        jtFs.delete(submitJobDir, true);

    }
  }
}