spark源码(十三)spark-submit action详解

   org.apache.spark.examples.SparkPi 27行
    def main(args: Array[String]): Unit = {
      val spark = SparkSession
        .builder
        .appName("Spark Pi")
        .getOrCreate()
      val slices = if (args.length > 0) args(0).toInt else 2
      val n = math.min(100000L * slices, Int.MaxValue).toInt 
      val count = spark.sparkContext.parallelize(1 until n, slices).map { i =>
        val x = random * 2 - 1
        val y = random * 2 - 1
        if (x*x + y*y <= 11 else 0
      }.reduce(_ + _)//我们知道reduce其实是一个action算子会触发任务执行 跟进一下
      println(s"Pi is roughly ${4.0 * count / (n - 1)}")
      spark.stop()
    }
   org.apache.spark.rdd.RDD 1102行
    def reduce(f: (T, T) => T): T = withScope {
      val cleanF = sc.clean(f)
      val reducePartition: Iterator[T] => Option[T] = iter => {
        if (iter.hasNext) {
          Some(iter.reduceLeft(cleanF))
        } else {
          None
        }
      }
      var jobResult: Option[T] = None//这一堆有点看不懂啊
      val mergeResult = (_: Int, taskResult: Option[T]) => {
        if (taskResult.isDefined) {
          jobResult = jobResult match {
            case Some(value) => Some(f(value, taskResult.get))
            case None => taskResult
          }
        }
      }
      sc.runJob(this, reducePartition, mergeResult) /*函数入口*/
      jobResult.getOrElse(throw new UnsupportedOperationException("empty collection"))
    }
   org.apache.spark.SparkContext 2286行
    def runJob[T, U: ClassTag](
        rdd: RDD[T],
        processPartition: Iterator[T] => U,
        resultHandler: (Int, U) => Unit): Unit = {
      val processFunc = (context: TaskContext, iter: Iterator[T]) => processPartition(iter)
      runJob[T, U](rdd, processFunc, 0 until rdd.partitions.length, resultHandler)
    }
   org.apache.spark.SparkContext 2182行
    def runJob[T, U: ClassTag](
        rdd: RDD[T],
        func: (TaskContext, Iterator[T]) => U,
        partitions: Seq[Int],
        resultHandler: (Int, U) => Unit): Unit = {
      if (stopped.get()) {
        throw new IllegalStateException("SparkContext has been shutdown")
      }
      val callSite = getCallSite
      val cleanedFunc = clean(func)
      logInfo("Starting job: " + callSite.shortForm)
      if (conf.getBoolean("spark.logLineage"false)) {
        logInfo("RDD's recursive dependencies:\n" + rdd.toDebugString)
      }
      /*最终调用的结果*/
      dagScheduler.runJob(rdd, cleanedFunc, partitions, 
callSite, resultHandler, localProperties.get)
      progressBar.foreach(_.finishAll())
      rdd.doCheckpoint()//在这checkpoint的意义是啥呢?
    }
   org.apache.spark.scheduler.DAGScheduler 884行
    def runJob[T, U](rdd: RDD[T],func: (TaskContext, Iterator[T]) => U,
        partitions: Seq[Int],callSite: CallSite,resultHandler: (Int, U) => Unit,
        properties: Properties): Unit = {
        /*rdd(action之前的rdd) func(运行在分区上的函数)partitions(最后rdd的分区)*/
        /*resultHandler(结果处理器)properties(配置参数)*/
      val start = System.nanoTime
/*任务切分入口*/
      val waiter = submitJob(rdd, func, partitions, callSite, resultHandler, properties)
      ThreadUtils.awaitReady(waiter.completionFuture, Duration.Inf)
      waiter.completionFuture.value.get match {
        ......日志没啥意思
      }
    }
   org.apache.spark.scheduler.DAGScheduler 826行
    def submitJob[T, U](rdd: RDD[T],func: (TaskContext, Iterator[T]) => U,
        partitions: Seq[Int],callSite: CallSite,resultHandler: (Int, U) => Unit,
        properties: Properties): JobWaiter[U] = {
      val maxPartitions = rdd.partitions.length
      partitions.find(p => p >= maxPartitions || p 0).foreach { p =>
        throw new IllegalArgumentException(
          "Attempting to access a non-existent partition: " + p + ". " +
            "Total number of partitions: " + maxPartitions)
      }
      eagerlyComputePartitionsForRddAndAncestors(rdd)

      val jobId = nextJobId.getAndIncrement()
      if (partitions.isEmpty) {
        val clonedProperties = Utils.cloneProperties(properties)
        if (sc.getLocalProperty(SparkContext.SPARK_JOB_DESCRIPTION) == null) {
          clonedProperties.setProperty(SparkContext.SPARK_JOB_DESCRIPTION, callSite.shortForm)
        }
        val time = clock.getTimeMillis()
        listenerBus.post(
          SparkListenerJobStart(jobId, time, Seq.empty, clonedProperties))
        listenerBus.post(
          SparkListenerJobEnd(jobId, time, JobSucceeded))
        return new JobWaiter[U](this, jobId, 0, resultHandler)/*job运行所需要的信息*/
      }

      assert(partitions.nonEmpty)
      val func2 = func.asInstanceOf[(TaskContext, Iterator[_]) => _]
      val waiter = new JobWaiter[U](this, jobId, partitions.size, resultHandler)
      /*后续其实是把当前事件放在队列里面 真正的调用方法其实是在eventProcessLoop 里面*/
      /*我们跟一下 DAGSchedulerEventProcessLoop方法*/
      eventProcessLoop.post(JobSubmitted(
        jobId, rdd, func2, partitions.toArray, callSite, waiter,
        Utils.cloneProperties(properties)))
      waiter
    }
   org.apache.spark.scheduler.DAGScheduler 2421 行
    private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler)
      extends EventLoop[DAGSchedulerEvent]("dag-scheduler-event-loop") with Logging {
      //这不是RPC调用
      override def onReceive(event: DAGSchedulerEvent): Unit = {
        val timerContext = timer.time()
        try {
          //OK一直调用doOnReceive 方法理解了
          //我们跟进父类  看看谁调用了 onReceive
          doOnReceive(event)
        } finally {
          timerContext.stop()
        }
      }
      private def doOnReceive(event: DAGSchedulerEvent): Unit = event match {}
      ......
    }
   org.apache.spark.util.EventLoop 34行
    private[spark] abstract class EventLoop[E](nameStringextends Logging {
      private[spark] val eventThread = new Thread(name) {
        setDaemon(true)

        override def run(): Unit = {
          try {
            while (!stopped.get) {
              val event = eventQueue.take()
              try {
                //OK 其实是父类有一个线程一直从eventQueue 拿取事件
                onReceive(event)//然后调用 onReceive
              } catch {
                case NonFatal(e) =>
                  try {
                    onError(e)
                  } catch {
                    case NonFatal(e) => logError("Unexpected error in " + name, e)
                  }
              }
            }
          } catch {
            ......
          }
        }

      }
    }
   org.apache.spark.scheduler.DAGScheduler 2438 行
    //既然上面清楚了,那我们就跟进一下 doOnReceive 方法
    private def doOnReceive(event: DAGSchedulerEvent): Unit event match {
      case JobSubmitted(jobId, rdd, func, partitions, callSite, listener, properties=>
        dagScheduler.handleJobSubmitted(jobId, rdd, func, 
partitions, callSite, listener, properties)

      ......//其他的没有用暂时不看了
    }
   org.apache.spark.scheduler.DAGScheduler 1149 行
    dagScheduler.handleJobSubmitted 方法跟进 case JobSubmitted
    private[scheduler] def handleJobSubmitted(jobId: Int,finalRDD: RDD[_],
        func: (TaskContext, Iterator[_]) => _,partitions: Array[Int],
        callSite: CallSite,listener: JobListener,
        properties: Properties): Unit = {/*重点方法*/
      var finalStage: ResultStage = null
      try {
/*切分stage的入口  返回stage包含父stage   */
        finalStage = createResultStage(finalRDD, func, partitions, jobId, callSite)
      } catch {
/*shuffleMapStage resultStage|shuffleMapTask resultTask|ShuffleDependency NarrowDependency|*/
        case e: BarrierJobSlotsNumberCheckFailed =>
          val numCheckFailures = barrierJobIdToNumTasksCheckFailures.compute(jobId,
            (_: Int, value: Int) => value + 1)

          if (numCheckFailures <= maxFailureNumTasksCheck) {
            messageScheduler.schedule(
              new Runnable {
                override def run(): Unit = eventProcessLoop.post(JobSubmitted(jobId, finalRDD, func,
                  partitions, callSite, listener, properties))
              },
              timeIntervalNumTasksCheck,
              TimeUnit.SECONDS
            )
            return
          } else {
            barrierJobIdToNumTasksCheckFailures.remove(jobId)
            listener.jobFailed(e)
            return
          }

        case e: Exception =>
          logWarning("Creating new stage failed due to exception - job: " + jobId, e)
          listener.jobFailed(e)
          return
      }
      barrierJobIdToNumTasksCheckFailures.remove(jobId)

      val job = new ActiveJob(jobId, finalStage, callSite, listener, properties)
      clearCacheLocs()

      val jobSubmissionTime = clock.getTimeMillis()
      jobIdToActiveJob(jobId) = job
      activeJobs += job
      finalStage.setActiveJob(job)
      val stageIds = jobIdToStageIds(jobId).toArray
      val stageInfos = stageIds.flatMap(id => stageIdToStage.get(id).map(_.latestInfo))
      listenerBus.post(
        SparkListenerJobStart(job.jobId, jobSubmissionTime, stageInfos,
          Utils.cloneProperties(properties)))
      submitStage(finalStage)/*提交finalStage 通过递归调用不断往前找stage*/
    }
   1.1 createResultStage 详解
    private def createResultStage(
        rdd: RDD[_],
        func: (TaskContext, Iterator[_]) => _,
        partitions: Array[Int],
        jobId: Int,
        callSite: CallSite): ResultStage = {
      val (shuffleDeps, resourceProfiles) = getShuffleDependenciesAndResourceProfiles(rdd)
      val resourceProfile = mergeResourceProfilesForStage(resourceProfiles)
      checkBarrierStageWithDynamicAllocation(rdd)
      checkBarrierStageWithNumSlots(rdd, resourceProfile)
      checkBarrierStageWithRDDChainPattern(rdd, partitions.toSet.size)
      val parents = getOrCreateParentStages(shuffleDeps, jobId)/*拿到最后阶段的父阶段*/
      val id = nextStageId.getAndIncrement()
      val stage = new ResultStage(id, rdd, func, partitions, parents, jobId,
        callSite, resourceProfile.id)/*拿到最后一个阶段*/
      stageIdToStage(id) = stage
      updateJobIdStageIdMaps(jobId, stage)
      stage
    }
   1.1.1 getOrCreateParentStages 详解
    private def getOrCreateParentStages(shuffleDeps: HashSet[ShuffleDependency[_, _, _]],
        firstJobId: Int): List[Stage] = {
      shuffleDeps.map { shuffleDep =>
        getOrCreateShuffleMapStage(shuffleDep, firstJobId)/*入口方法*/
      }.toList
    }
   1.1.1.1 getOrCreateShuffleMapStage 详解
    private def getOrCreateShuffleMapStage(shuffleDep: ShuffleDependency[_, _, _],
        firstJobId: Int
): ShuffleMapStage 
= {
      shuffleIdToMapStage.get(shuffleDep.shuffleId) match {
        case Some(stage=>
          stage

        case None =>
/*传入一个rdd获得所有宽依赖*/
          getMissingAncestorShuffleDependencies(shuffleDep.rdd).foreach { dep =>
            if (!shuffleIdToMapStage.contains(dep.shuffleId)) {
/*传入一个宽依赖获得ShuffleMapStage*/
              createShuffleMapStage(dep, firstJobId)
            }
          }
          createShuffleMapStage(shuffleDep, firstJobId)
      }
    }
   1.2 submitStage 详解
    /*参数是resultStage,但是需要所有的父stage执行完成以后才能执行,所以这需要递归执行*/
    private def submitStage(stage: Stage): Unit = {
      val jobId = activeJobForStage(stage)
      if (jobId.isDefined) {
        logDebug(s"submitStage($stage (name=${stage.name};" +
          s"jobs=${stage.jobIds.toSeq.sorted.mkString(",")}))")
        if (!waitingStages(stage) && !runningStages(stage) && !failedStages(stage)) {
          val missing = getMissingParentStages(stage).sortBy(_.id)/*没有父stage*/
          logDebug("missing: " + missing)
          if (missing.isEmpty) {
            logInfo("Submitting " + stage + " (" + stage.rdd + "), which has no missing parents")
            submitMissingTasks(stage, jobId.get)/*提交当前stage*/
          } else {
            for (parent <- missing) {
              submitStage(parent)/*提交父stage 递归调用*/
            }
            waitingStages += stage
          }
        }
      } else {
        abortStage(stage, "No active job for stage " + stage.id, None)
      }
    }
   1.2.1 submitMissingTasks 详解
    //这个方法还是有点难懂的  里面
    private def submitMissingTasks(stage: Stage, jobId: Int): Unit = {
    /*1.拿到最后一个stage分区个数|序列化taskbinary|为每一个partation分配一个task|提交一个task*/

      stage match {
        case sms: ShuffleMapStage if stage.isIndeterminate && !sms.isAvailable =>
          mapOutputTracker.unregisterAllMapOutput(sms.shuffleDep.shuffleId)
        case _ =>
      }

      val partitionsToCompute: Seq[Int] = stage.findMissingPartitions()

      val properties = jobIdToActiveJob(jobId).properties
      addPySparkConfigsToProperties(stage, properties)

      runningStages += stage
      stage match {/* 是不是最终的stage */
        case s: ShuffleMapStage =>
          outputCommitCoordinator.stageStart(stage = s.id, maxPartitionId = s.numPartitions - 1)
          if (pushBasedShuffleEnabled) {
            prepareShuffleServicesForShuffleMapStage(s)
          }
        case s: ResultStage =>
          outputCommitCoordinator.stageStart(
            stage = s.id, maxPartitionId = s.rdd.partitions.length - 1)
      }
      val taskIdToLocations: Map[Int, Seq[TaskLocation]] = try {
        stage match {
          case s: ShuffleMapStage =>
            /*获取优先的位置信息*/
            partitionsToCompute.map { id => (id, getPreferredLocs(stage.rdd, id))}.toMap
          case s: ResultStage =>
            partitionsToCompute.map { id =>
              val p = s.partitions(id)
              (id, getPreferredLocs(stage.rdd, p))
            }.toMap
        }
      } catch {
        case NonFatal(e) =>
          stage.makeNewStageAttempt(partitionsToCompute.size)
          listenerBus.post(SparkListenerStageSubmitted(stage.latestInfo,
            Utils.cloneProperties(properties)))
          abortStage(stage, s"Task creation failed: $e\n${Utils.exceptionString(e)}", Some(e))
          runningStages -= stage
          return
      }

      stage.makeNewStageAttempt(partitionsToCompute.size, taskIdToLocations.values.toSeq)

      if (partitionsToCompute.nonEmpty) {
        stage.latestInfo.submissionTime = Some(clock.getTimeMillis())
      }
      listenerBus.post(SparkListenerStageSubmitted(stage.latestInfo,
        Utils.cloneProperties(properties)))

      var taskBinary: Broadcast[Array[Byte]] = null
      var partitions: Array[Partition] = null
      try {
        var taskBinaryBytes: Array[Byte] = null
        /*resultTask把最终结果返回到driver|ShuffleMapTask*/
        RDDCheckpointData.synchronized {
          taskBinaryBytes = stage match {
            case stage: ShuffleMapStage =>
              JavaUtils.bufferToArray(
                closureSerializer.serialize((stage.rdd, stage.shuffleDep): AnyRef))
            case stage: ResultStage =>
              JavaUtils.bufferToArray(closureSerializer.serialize((stage.rdd, stage.func): AnyRef))
          }

          partitions = stage.rdd.partitions
        }

        if (taskBinaryBytes.length > TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024) {
          logWarning(s"Broadcasting large task binary with size " +
            s"${Utils.bytesToString(taskBinaryBytes.length)}")
        }
        taskBinary = sc.broadcast(taskBinaryBytes)
      } catch {
        case e: NotSerializableException =>
          abortStage(stage, "Task not serializable: " + e.toString, Some(e))
          runningStages -= stage

          return
        case e: Throwable =>
          abortStage(stage, s"Task serialization failed: $e\n${Utils.exceptionString(e)}", Some(e))
          runningStages -= stage

          return
      }

      val tasks: Seq[Task[_]] = try {
        val serializedTaskMetrics = closureSerializer.serialize(stage.latestInfo.taskMetrics).array()
        stage match {
          case stage: ShuffleMapStage =>
            stage.pendingPartitions.clear()
            /*当前分区的stage的最后一个rdd的分区信息*/
            partitionsToCompute.map { id =>
              val locs = taskIdToLocations(id)
              val part = partitions(id)
              stage.pendingPartitions += id
              new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber,
                taskBinary, part, locs, properties, serializedTaskMetrics, Option(jobId),
                Option(sc.applicationId), sc.applicationAttemptId, stage.rdd.isBarrier())
            }

          case stage: ResultStage =>
            partitionsToCompute.map { id =>
              val p: Int = stage.partitions(id)
              val part = partitions(p)
              val locs = taskIdToLocations(id)
              new ResultTask(stage.id, stage.latestInfo.attemptNumber,
                taskBinary, part, locs, id, properties, serializedTaskMetrics,
                Option(jobId), Option(sc.applicationId), sc.applicationAttemptId,
                stage.rdd.isBarrier())
            }
        }
      } catch {
        case NonFatal(e) =>
          abortStage(stage, s"Task creation failed: $e\n${Utils.exceptionString(e)}", Some(e))
          runningStages -= stage
          return
      }

      if (tasks.nonEmpty) {
        logInfo(s"Submitting ${tasks.size} missing tasks from $stage (${stage.rdd}) (first 15 " +
          s"tasks are for partitions ${tasks.take(15).map(_.partitionId)})")
        /*提交task  TaskSchedulerImpl*/
        taskScheduler.submitTasks(new TaskSet(
          tasks.toArray, stage.id, stage.latestInfo.attemptNumber, jobId, properties,
          stage.resourceProfileId))
      } else {
        markStageAsFinished(stage, None)

        stage match {
          case stage: ShuffleMapStage =>
            logDebug(s"Stage ${stage} is actually done; " +
                s"(available: ${stage.isAvailable}," +
                s"available outputs: ${stage.numAvailableOutputs}," +
                s"partitions: ${stage.numPartitions})")
            markMapStageJobsAsFinished(stage)
          case stage : ResultStage =>
            logDebug(s"Stage ${stage} is actually done; (partitions: ${stage.numPartitions})")
        }
        submitWaitingChildStages(stage)
      }
    }
   1.2.1.1 askScheduler.submitTasks 详解
    org.apache.spark.scheduler.TaskSchedulerImpl 234
    override def submitTasks(taskSet: TaskSet): Unit = {
      val tasks = taskSet.tasks
      logInfo("Adding task set " + taskSet.id + " with " + tasks.length + " tasks "
        + "resource profile " + taskSet.resourceProfileId)
      this.synchronized {
        val manager = createTaskSetManager(taskSet, maxTaskFailures)
        val stage = taskSet.stageId
        val stageTaskSets =
          taskSetsByStageIdAndAttempt.getOrElseUpdate(stage, new HashMap[Int, TaskSetManager])
        stageTaskSets.foreach { case (_, ts) =>
          ts.isZombie = true
        }
        stageTaskSets(taskSet.stageAttemptId) = manager
/* FIFO|FAIR 调用策略*/
        schedulableBuilder.addTaskSetManager(manager, manager.taskSet.properties)

        if (!isLocal && !hasReceivedTask) {
          starvationTimer.scheduleAtFixedRate(new TimerTask() {
            override def run(): Unit = {
              if (!hasLaunchedTask) {
                logWarning("Initial job has not accepted any resources; " +
                  "check your cluster UI to ensure that workers are registered " +
                  "and have sufficient resources")
              } else {
                this.cancel()
              }
            }
          }, STARVATION_TIMEOUT_MS, STARVATION_TIMEOUT_MS)
        }
        hasReceivedTask = true
      }
      backend.reviveOffers()/*CoarseGrainedSchedulerBackend 类的方法*/
    }
   1.2.1.1.1 backend.reviveOffers 详解
    org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend 582
    override def reviveOffers(): Unit = Utils.tryLogNonFatalError {
      driverEndpoint.send(ReviveOffers)//最后还是调用了driver的的方法
    }

    org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend 165
    case ReviveOffers =>
      makeOffers()
   1.2.1.1.1.1 makeOffers 详解
    private def makeOffers(): Unit = {
      // Make sure no executor is killed while some task is launching on it
      val taskDescs = withLock {
        // Filter out executors under killing
        val activeExecutors = executorDataMap.filterKeys(isExecutorActive)
        val workOffers = activeExecutors.map {
          case (id, executorData) =>
            new WorkerOffer(id, executorData.executorHost, executorData.freeCores,
              Some(executorData.executorAddress.hostPort),
              executorData.resourcesInfo.map { case (rName, rInfo) =>
                (rName, rInfo.availableAddrs.toBuffer)
              }, executorData.resourceProfileId)
        }.toIndexedSeq
        scheduler.resourceOffers(workOffers, true)
      }
      if (taskDescs.nonEmpty) {
        launchTasks(taskDescs)/*启动任务*/
      }
    }
   1.2.1.1.1.1.1 launchTasks 详解
    org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend 351
    private def launchTasks(tasks: Seq[Seq[TaskDescription]]): Unit = {
      for (task <- tasks.flatten) {
        val serializedTask = TaskDescription.encode(task)
        /*Task序列化数据超过RpcMessage的最大大小*/
        if (serializedTask.limit() >= maxRpcMessageSize) {
          Option(scheduler.taskIdToTaskSetManager.get(task.taskId)).foreach { taskSetMgr =>
            try {
              var msg = "Serialized task %s:%d was %d bytes, which exceeds max allowed: " +
                s"${RPC_MESSAGE_MAX_SIZE.key} (%d bytes). Consider increasing " +
                s"${RPC_MESSAGE_MAX_SIZE.key} or using broadcast variables for large values."
              msg = msg.format(task.taskId, task.index, serializedTask.limit(), maxRpcMessageSize)
              taskSetMgr.abort(msg)
            } catch {
              case e: Exception => logError("Exception in error callback", e)
            }
          }
        }
        else {
          val executorData = executorDataMap(task.executorId)
          val rpId = executorData.resourceProfileId
          val prof = scheduler.sc.resourceProfileManager.resourceProfileFromId(rpId)
          val taskCpus = ResourceProfile.getTaskCpusOrDefaultForProfile(prof, conf)
          executorData.freeCores -= taskCpus
          task.resources.foreach { case (rName, rInfo) =>
            assert(executorData.resourcesInfo.contains(rName))
            executorData.resourcesInfo(rName).acquire(rInfo.addresses)
          }

          logDebug(s"Launching task ${task.taskId} on executor id: ${task.executorId} hostname: " +
            s"${executorData.executorHost}.")
          /*发送消息给executorEndpoint*/
          executorData.executorEndpoint.send(LaunchTask(new SerializableBuffer(serializedTask)))
        }
      }
    }
   LaunchTask 详解
    org.apache.spark.executor.CoarseGrainedExecutorBackend 166 行
    case LaunchTask(data) =>
      if (executor == null) {
        exitExecutor(1"Received LaunchTask command but executor was null")
      } else {
        val taskDesc = TaskDescription.decode(data.value)
        logInfo("Got assigned task " + taskDesc.taskId)
        taskResources(taskDesc.taskId) = taskDesc.resources
        executor.launchTask(this, taskDesc)
      }
   launchTask 详解
    org.apache.spark.executor.Executor 269
    def launchTask(context: ExecutorBackend, taskDescription: TaskDescription): Unit = {
      //该类继承Runnable 直接跳转到 run 方法
      val tr = new TaskRunner(context, taskDescription, plugins)
      runningTasks.put(taskDescription.taskId, tr)
      threadPool.execute(tr)
      if (decommissioned) {
        log.error(s"Launching a task while in decommissioned state.")
      }
    }
   run 详解
    org.apache.spark.executor.Executor 432 行
    override def run(): Unit = {
      setMDCForTask(taskName, mdcProperties)
      threadId = Thread.currentThread.getId
      Thread.currentThread.setName(threadName)
      val threadMXBean = ManagementFactory.getThreadMXBean
      val taskMemoryManager = new TaskMemoryManager(env.memoryManager, taskId)/*task内存管理*/
      val deserializeStartTimeNs = System.nanoTime()/*任务时长*/
      val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
        threadMXBean.getCurrentThreadCpuTime
      } else 0/*使用cpu时长*/
      Thread.currentThread.setContextClassLoader(replClassLoader)
      val ser = env.closureSerializer.newInstance()
      logInfo(s"Running $taskName")
      execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
      var taskStartTimeNs: Long = 0
      var taskStartCpu: Long = 0
      startGCTime = computeTotalGcTime()/*jc时间*/
      var taskStarted: Boolean = false

      try {
        Executor.taskDeserializationProps.set(taskDescription.properties)
        /*Task文件依赖问题,资源,jar包*/
        updateDependencies(
          taskDescription.addedFiles, taskDescription.addedJars, taskDescription.addedArchives)
        task = ser.deserialize[Task[Any]](
          taskDescription.serializedTask, Thread.currentThread.getContextClassLoader)
        task.localProperties = taskDescription.properties
        task.setTaskMemoryManager(taskMemoryManager)

        val killReason = reasonIfKilled
        if (killReason.isDefined) {
          throw new TaskKilledException(killReason.get)
        }

        if (!isLocal) {
          logDebug(s"$taskName's epoch is ${task.epoch}")
          env.mapOutputTracker.asInstanceOf[MapOutputTrackerWorker].updateEpoch(task.epoch)
        }

        metricsPoller.onTaskStart(taskId, task.stageId, task.stageAttemptId)
        taskStarted = true

        taskStartTimeNs = System.nanoTime()
        taskStartCpu = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
          threadMXBean.getCurrentThreadCpuTime
        } else 0L
        var threwException = true
        val value = Utils.tryWithSafeFinally {
          val res = task.run(/*执行task*/
            taskAttemptId = taskId,
            attemptNumber = taskDescription.attemptNumber,
            metricsSystem = env.metricsSystem,
            resources = taskDescription.resources,
            plugins = plugins)
          threwException = false
          res
        } ......
    }
   runTask 详解
    //val res = task.run 内部还是调用了 runTask 
    //调用的还是最终的实现类 ResultTask ShuffleTask
    override def runTask(context: TaskContext): U = {
      // Deserialize the RDD and the func using the broadcast variables.
      val threadMXBean = ManagementFactory.getThreadMXBean
      val deserializeStartTimeNs = System.nanoTime()
      val deserializeStartCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported) {
        threadMXBean.getCurrentThreadCpuTime
      } else 0L
      val ser = SparkEnv.get.closureSerializer.newInstance()
      val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
        ByteBuffer.wrap(taskBinary.value)
Thread.currentThread.getContextClassLoader)
      _executorDeserializeTimeNs = System.nanoTime() - deserializeStartTimeNs
      _executorDeserializeCpuTime = if (threadMXBean.isCurrentThreadCpuTimeSupported)
 {
        threadMXBean.getCurrentThreadCpuTime - deserializeStartCpuTime
      } else 0L/*本身拿到的是ResultTask,collect收集到Driver foreach当前Task打印输出 写出到HDFS*/

      func(context, rdd.iterator(partition, context))
    }
posted @ 2022-09-13 14:12  Kotlin  阅读(69)  评论(0编辑  收藏  举报
Live2D