Hadoop MapReduce执行过程(一)

JobClient

   JobClient是提交job的客户端,当创建一个实例时,构造函数里面要做的事情是:

  public JobClient(JobConf conf) throws IOException {
    setConf(conf);
    init(conf);
  }

  public void init(JobConf conf) throws IOException {
    String tracker = conf.get("mapred.job.tracker", "local");
    tasklogtimeout = conf.getInt(
      TASKLOG_PULL_TIMEOUT_KEY, DEFAULT_TASKLOG_TIMEOUT);
    this.ugi = UserGroupInformation.getCurrentUser();
//根据配置创建需要连接的JobTracker的类型
if ("local".equals(tracker)) { conf.setNumMapTasks(1); this.jobSubmitClient = new LocalJobRunner(conf); } else { this.jobSubmitClient = createRPCProxy(JobTracker.getAddress(conf), conf); } }

    创建完实例,向JobTracker提交一个job使用的方法是:

  public RunningJob submitJob(JobConf job) throws FileNotFoundException,
                                                  IOException {
    try {
      return submitJobInternal(job);
    } catch (InterruptedException ie) {
      throw new IOException("interrupted", ie);
    } catch (ClassNotFoundException cnfe) {
      throw new IOException("class not found", cnfe);
    }
  }


 public  RunningJob submitJobInternal(final JobConf job
                               ) throws FileNotFoundException, 
                                        ClassNotFoundException,
                                        InterruptedException,
                                        IOException {
    /*
     * configure the command line options correctly on the submitting dfs
     */
    return ugi.doAs(new PrivilegedExceptionAction<RunningJob>() {
      public RunningJob run() throws FileNotFoundException, 
      ClassNotFoundException,
      InterruptedException,
      IOException{
        JobConf jobCopy = job;
//获得JobTracker资源中转目录,这个是job提交job.xml,job.jar等资源的父目录 Path jobStagingArea
= JobSubmissionFiles.getStagingDir(JobClient.this, jobCopy);
//得到新的JobID JobID jobId
= jobSubmitClient.getNewJobId(); Path submitJobDir = new Path(jobStagingArea, jobId.toString()); jobCopy.set("mapreduce.job.dir", submitJobDir.toString()); JobStatus status = null; try { populateTokenCache(jobCopy, jobCopy.getCredentials()); //将所有的资源文件拷贝到JobTracker的资源中转目录中 copyAndConfigureFiles(jobCopy, submitJobDir); // get delegation token for the dir TokenCache.obtainTokensForNamenodes(jobCopy.getCredentials(), new Path [] {submitJobDir}, jobCopy); Path submitJobFile = JobSubmissionFiles.getJobConfPath(submitJobDir); int reduces = jobCopy.getNumReduceTasks(); InetAddress ip = InetAddress.getLocalHost(); if (ip != null) { job.setJobSubmitHostAddress(ip.getHostAddress()); job.setJobSubmitHostName(ip.getHostName()); } JobContext context = new JobContext(jobCopy, jobId); jobCopy = (JobConf)context.getConfiguration(); // Check the output specification 对作业结果输出目录的检查 if (reduces == 0 ? jobCopy.getUseNewMapper() : jobCopy.getUseNewReducer()) { org.apache.hadoop.mapreduce.OutputFormat<?,?> output = ReflectionUtils.newInstance(context.getOutputFormatClass(), jobCopy); output.checkOutputSpecs(context); } else { jobCopy.getOutputFormat().checkOutputSpecs(fs, jobCopy); } // Create the splits for the job FileSystem fs = submitJobDir.getFileSystem(jobCopy); LOG.debug("Creating splits at " + fs.makeQualified(submitJobDir));
//将输入数据split好以后写入到JobTracker指定的目录
int maps = writeSplits(context, submitJobDir); jobCopy.setNumMapTasks(maps); // write "queue admins of the queue to which job is being submitted" // to job file. String queue = jobCopy.getQueueName(); AccessControlList acl = jobSubmitClient.getQueueAdmins(queue); jobCopy.set(QueueManager.toFullPropertyName(queue, QueueACL.ADMINISTER_JOBS.getAclName()), acl.getACLString()); // Write job file to JobTracker's fs FSDataOutputStream out = FileSystem.create(fs, submitJobFile, new FsPermission(JobSubmissionFiles.JOB_FILE_PERMISSION)); try { jobCopy.writeXml(out); } finally { out.close(); } // // Now, actually submit the job (using the submit name) // printTokens(jobId, jobCopy.getCredentials()); status = jobSubmitClient.submitJob( jobId, submitJobDir.toString(), jobCopy.getCredentials()); JobProfile prof = jobSubmitClient.getJobProfile(jobId); if (status != null && prof != null) { return new NetworkedJob(status, prof, jobSubmitClient); } else { throw new IOException("Could not launch job"); } } finally { if (status == null) { LOG.info("Cleaning up the staging area " + submitJobDir); if (fs != null && submitJobDir != null) fs.delete(submitJobDir, true); } } } }); }

    在这个方法里面调用了JobTracker.submitJob()方法,返回值是JobStatus.

  

JobTracker

    JobTracker是以一个单独的jvm运行的,在接收Job的提交之前,他必须已经启动:

    

 public static void main(String argv[]
                          ) throws IOException, InterruptedException {
    StringUtils.startupShutdownMessage(JobTracker.class, argv, LOG);
    
    try {
      if(argv.length == 0) {
        JobTracker tracker = startTracker(new JobConf());
        tracker.offerService();
      }
      else {
        if ("-dumpConfiguration".equals(argv[0]) && argv.length == 1) {
          dumpConfiguration(new PrintWriter(System.out));
        }
        else {
          System.out.println("usage: JobTracker [-dumpConfiguration]");
          System.exit(-1);
        }
      }
    } catch (Throwable e) {
      LOG.fatal(StringUtils.stringifyException(e));
      System.exit(-1);
    }
  }

 main方法里面做了两件事情:

 实例化一个JobTracker,并且启动它。

 开始对外进行服务。

 

 先看实例化JobTracker:

 public static JobTracker startTracker(JobConf conf, String identifier) 
  throws IOException, InterruptedException {
    DefaultMetricsSystem.initialize("JobTracker");
    JobTracker result = null;
    while (true) {
      try {
        result = new JobTracker(conf, identifier);
        result.taskScheduler.setTaskTrackerManager(result);
        break;
      } catch (VersionMismatch e) {
        throw e;
      } catch (BindException e) {
        throw e;
      } catch (UnknownHostException e) {
        throw e;
      } catch (AccessControlException ace) {
        // in case of jobtracker not having right access
        // bail out
        throw ace;
      } catch (IOException e) {
        LOG.warn("Error starting tracker: " + 
                 StringUtils.stringifyException(e));
      }
      Thread.sleep(1000);
    }
    if (result != null) {
      JobEndNotifier.startNotifier();
      MBeans.register("JobTracker", "JobTrackerInfo", result);
    }
    return result;
  }

实例化要做的事情有:

 从配置文件里面读取各种配置值。

 实例化TaskScheduler,默认的为JobQueueTaskScheduler。

   启动interTrackerServer,内部的PRC服务,提供和TaskTracker的通讯。

   启动Http服务infoServer。

  JobQueueTaskScheduler就是job和task的默认调度器,FIFO队列调度。在offerService()方法里面会启动JobQueueTaskScheduler。

 

TaskTracker

  TaskTracker也是在单个jvm中执行的,在启动之初调用run()方法,调用链是:run()==>offerService()==>transmitHeartBeat()

   ==>JobTracker.heartbeat()。 调用transmitHeartBeat()返回的结果是HeartbeatResponse,TaskTracker根据HeartbeatResponse进行相应的处理

     TaskTrackerAction[] actions = heartbeatResponse.getActions();
 if (actions != null){ 
          for(TaskTrackerAction action: actions) {
            if (action instanceof LaunchTaskAction) {
              addToTaskQueue((LaunchTaskAction)action);
            } else if (action instanceof CommitTaskAction) {
              CommitTaskAction commitAction = (CommitTaskAction)action;
              if (!commitResponses.contains(commitAction.getTaskID())) {
                LOG.info("Received commit task action for " + 
                          commitAction.getTaskID());
                commitResponses.add(commitAction.getTaskID());
              }
            } else {
              tasksToCleanup.put(action);
            }
          }
        }

根据返回的TaskTrackerAction,进行不同的操作:开启一个task或者提交一个task。对于开启一个task后,TaskLauncher接管了后面的工作,下回再表

附一张草图:

 

 

  

 

 

posted @ 2012-05-16 13:36  nod0620  阅读(3690)  评论(0编辑  收藏  举报