spark源码(十)Worker receive 方法
一.case msg: RegisterWorkerResponse
二.case SendHeartbeat
三.case WorkDirCleanup
四.case MasterChanged
五.case ReconnectWorker
六.case LaunchExecutor
七.case executorStateChanged: ExecutorStateChanged
八.case KillExecutor(masterUrl, appId, execId)
九.case LaunchDriver(driverId, driverDesc, resources_)
十.case KillDriver(driverId)
十一.case driverStateChanged @ DriverStateChanged(driverId, state, exception)
十二.case ReregisterWithMaster
十三.case ApplicationFinished(id)
十四.case DecommissionWorker
十五.case WorkerSigPWRReceived
十一. driverStateChanged @ DriverStateChanged(driverId, state, exception) 详解
private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
val driverId = driverStateChanged.driverId
val exception = driverStateChanged.exception
val state = driverStateChanged.state
state match {
case DriverState.ERROR =>
logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
case DriverState.FAILED =>
logWarning(s"Driver $driverId exited with failure")
case DriverState.FINISHED =>
logInfo(s"Driver $driverId exited successfully")
case DriverState.KILLED =>
logInfo(s"Driver $driverId was killed by user")
case _ =>
logDebug(s"Driver $driverId changed state to $state")
}
sendToMaster(driverStateChanged)
val driver = drivers.remove(driverId).get
finishedDrivers(driverId) = driver
//finished drivers 保存最大个数配置处理
trimFinishedDriversIfNecessary()
//内存cpu 相关资源操作
memoryUsed -= driver.driverDesc.mem
coresUsed -= driver.driverDesc.cores
//文件相关资源操作
removeResourcesUsed(driver.resources)
}
十二. ReregisterWithMaster 详解
private def reregisterWithMaster(): Unit = {
Utils.tryOrExit {
connectionAttemptCount += 1
if (registered) {
cancelLastRegistrationRetry()
//TOTAL_REGISTRATION_RETRIES = INITIAL_REGISTRATION_RETRIES + 10
} else if (connectionAttemptCount <= TOTAL_REGISTRATION_RETRIES) {
logInfo(s"Retrying connection to master (attempt # $connectionAttemptCount)")
master match {
case Some(masterRef) =>
if (registerMasterFutures != null) {
//取消 注册master失败 的
registerMasterFutures.foreach(_.cancel(true))
}
//重新统一注册
val masterAddress =
if (preferConfiguredMasterAddress) masterAddressToConnect.get
else masterRef.address
registerMasterFutures = Array(registerMasterThreadPool.submit(
new Runnable {
override def run(): Unit = {
try {
logInfo("Connecting to master " + masterAddress + "...")
val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME)
/*跳转到Master对象的RegisterWorker */
sendRegisterMessageToMaster(masterEndpoint)
//向master发送了一个RegisterWorker的消息
} catch {
case ie: InterruptedException => // Cancelled
case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
}
}
}))
case None =>
if (registerMasterFutures != null) {
registerMasterFutures.foreach(_.cancel(true))
}
//上面方法有介绍的 向全部的master注册
registerMasterFutures = tryRegisterAllMasters()
}
//TOTAL_REGISTRATION_RETRIES = INITIAL_REGISTRATION_RETRIES + 10
if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) {
//如果当前注册不行
registrationRetryTimer.foreach(_.cancel(true))
registrationRetryTimer = Some(
forwardMessageScheduler.scheduleAtFixedRate(//再次注册
() => Utils.tryLogNonFatalError { self.send(ReregisterWithMaster) },
PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
TimeUnit.SECONDS))
}
} else {//超过最大注册此时 算了放弃
logError("All masters are unresponsive! Giving up.")
System.exit(1)
}
}
}
十三. ApplicationFinished(id) 详解
//个人理解这个前面应该做了cpu memory的清除工作
finishedApps += id
//上面有的介绍的.清除资源文件
maybeCleanupApplication(id)
十四. DecommissionWorker 详解 自己失联了
private[deploy] def decommissionSelf(): Unit = {
if (conf.get(config.DECOMMISSION_ENABLED) && !decommissioned) {
decommissioned = true
logInfo(s"Decommission worker $workerId.")
} else if (decommissioned) {
logWarning(s"Worker $workerId already started decommissioning.")
} else {
logWarning(s"Receive decommission request, but decommission feature is disabled.")
}
}
十五. WorkerSigPWRReceived 详解
//先把自己失联了
decommissionSelf()
//告诉master节点,不要联系我了
sendToMaster(WorkerDecommissioning(workerId, self))
搬砖多年终不得要领,遂载源码看之望得真经。