spark源码(三)Master recive方法

    Master recive全部方法
        一.case ElectedLeader => ......
        二.case CompleteRecovery => ......
        三.case RevokedLeadership => ......
        四.case WorkerDecommissioning => ......
        五.case DecommissionWorkers => ......
        六.case RegisterWorker => ......
        七.case RegisterApplication => ......
        八.case ExecutorStateChanged => ......
        九.case DriverStateChanged => ......
        十.case Heartbeat => ......
        十一.case MasterChangeAcknowledged => ......
        十二.case WorkerSchedulerStateResponse => ......
        十三.case WorkerLatestState => ......
        十四.case UnregisterApplication => ......
        十五.case CheckForWorkerTimeOut => ......

三.RevokedLeadership 详解

    logError("Leadership has been revoked -- master shutting down.")
    System.exit(0)  //方法还是比较简单的,直接把当前master节点退出就行  
                    //别听公众号瞎几把扯  exit 就是最优雅的退出方式

四.WorkerDecommissioning 详解 单个worker节点坏了

    if (state == RecoveryState.STANDBY) {//STANDBY就是备份的节点
        workerRef.send(MasterInStandby)
    } else {
        idToWorker.get(id).foreach(decommissionWorker)
    }
4.1 decommissionWorker 详解
    private def decommissionWorker(worker: WorkerInfo): Unit = {
        if (worker.state != WorkerState.DECOMMISSIONED) {
          logInfo("Decommissioning worker %s on %s:%d".format(worker.id, worker.host, worker.port))
          worker.setState(WorkerState.DECOMMISSIONED)
          for (exec <- worker.executors.values) {
            logInfo("Telling app of decommission executors")
            exec.application.driver.send(ExecutorUpdated(//在别的节点上新起executor 
              exec.id, ExecutorState.DECOMMISSIONED,
              Some("worker decommissioned"), None,
              Some(worker.host)))
            exec.state = ExecutorState.DECOMMISSIONED
            exec.application.removeExecutor(exec)//worker上所有executor去除
          }
          persistenceEngine.removeWorker(worker)
        } else {
          logWarning("Skipping decommissioning worker %s on %s:%d as worker is already decommissioned".
            format(worker.id, worker.host, worker.port))
        }
    }

五.DecommissionWorkers 详解 所有节点全坏了

    assert(state != RecoveryState.STANDBY)
    ids.foreach ( id =>
        idToWorker.get(id).foreach { w =>
          decommissionWorker(w)
          w.endpoint.send(DecommissionWorker)
        }
    )

六.registerWorker 详解 注册节点

    private def registerWorker(worker: WorkerInfo): Boolean = {
        workers.filter { w =>
          (w.host == worker.host && w.port == worker.port) && (w.state == WorkerState.DEAD)
        }.foreach { w =>
          workers -= w
        }//当前节点是死节点  就去掉当前节点
        val workerAddress = worker.endpoint.address
        if (addressToWorker.contains(workerAddress)) {//如果当前节点是已经注册过 
            val oldWorker = addressToWorker(workerAddress)
            if (oldWorker.state == WorkerState.UNKNOWN) {
                removeWorker(oldWorker, "Worker replaced by a new worker with same address")
            } else {
                logInfo("Attempted to re-register worker at same address: " + workerAddress)
                return false
            }
        }
        workers += worker//真正注册节点的代码
        idToWorker(worker.id) = worker//真正注册节点的代码
        addressToWorker(workerAddress) = worker//真正注册节点的代码
        true
    }

七.RegisterApplication 详解

    if (state == RecoveryState.STANDBY) {
        // ignore, don't send response
    } else {
        logInfo("Registering app " + description.name)
        val app = createApplication(description, driver)//只是new 了一个ApplicationInfo对象
        registerApplication(app)/*注册app相关信息  资源相关的处理*/
        logInfo("Registered app " + description.name + " with ID " + app.id)
        persistenceEngine.addApplication(app)
        driver.send(RegisteredApplication(app.id, self))/*返回一个注册成功的消息*/
        schedule()
    }

八.ExecutorStateChanged 详解

    (appId, execId, state, message, exitStatus) =>
    val execOption = idToApp.get(appId).flatMap(app => app.executors.get(execId))
    execOption match {
      case Some(exec) =>
        val appInfo = idToApp(appId)
        val oldState = exec.state
        exec.state = state
        if (state == ExecutorState.RUNNING) {//没有看懂这一块 如果状态是running 代码做什么
          assert(oldState == ExecutorState.LAUNCHING,
            s"executor $execId state transfer from $oldState to RUNNING is illegal")
          appInfo.resetRetryCount()
        }
        exec.application.driver.send(ExecutorUpdated(execId, state, message, exitStatus, None))
        if (ExecutorState.isFinished(state)) {//如果任务完成  移除executor
          logInfo(s"Removing executor ${exec.fullId} because it is $state")
          if (!appInfo.isFinished) {
            appInfo.removeExecutor(exec)//移除Executor 这只是移除资源
          }
          exec.worker.removeExecutor(exec)
          val normalExit = exitStatus == Some(0)
          if (!normalExit
              && oldState != ExecutorState.DECOMMISSIONED
              && appInfo.incrementRetryCount() >= maxExecutorRetries
              && maxExecutorRetries >= 0) { //任务完成状态  其中是失败导致的
            val execs = appInfo.executors.values
            if (!execs.exists(_.state == ExecutorState.RUNNING)) {
              logError(s"Application ${appInfo.desc.name} with ID ${appInfo.id} failed " +
                s"${appInfo.retryCount} times; removing it")
              removeApplication(appInfo, ApplicationState.FAILED)
            }
          }
        }
        schedule()
      case None =>
        logWarning(s"Got status update for unknown executor $appId/$execId")
    }
8.1 removeApplication 详解
    def removeApplication(app: ApplicationInfo, state: ApplicationState.Value): Unit = {
        if (apps.contains(app)) {
          logInfo("Removing app " + app.id)
          apps -= app
          idToApp -= app.id
          endpointToApp -= app.driver
          addressToApp -= app.driver.address
          if (completedApps.size >= retainedApplications) {
            val toRemove = math.max(retainedApplications / 101)
            completedApps.take(toRemove).foreach { a =>
              applicationMetricsSystem.removeSource(a.appSource)
            }
            completedApps.trimStart(toRemove)
          }
          completedApps += app 
          waitingApps -= app
          for (exec <- app.executors.values) {
            killExecutor(exec)
          }
          app.markFinished(state)
          if (state != ApplicationState.FINISHED) {
            app.driver.send(ApplicationRemoved(state.toString))//这其实已经把driver相关的移除了
          }
          persistenceEngine.removeApplication(app)
          schedule()
          workers.foreach { w =>
            w.endpoint.send(ApplicationFinished(app.id))//上面执行了一遍KillExecutor 这又执行一遍 ApplicationFinished 
          }
        }
    }
8.1.1 killExecutor 详解
    private def killExecutor(exec: ExecutorDesc): Unit = {
        exec.worker.removeExecutor(exec)
        exec.worker.endpoint.send(KillExecutor(masterUrl, exec.application.idexec.id))//这才是重点代码啊
        exec.state = ExecutorState.KILLED
    }
posted @   Kotlin  阅读(39)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
Live2D
点击右上角即可分享
微信分享提示
西雅图
14:14发布
西雅图
14:14发布
4°
东南风
2级
空气质量
相对湿度
92%
今天
3°/12°
周四
4°/11°
周五
2°/10°