|NO.Z.00078|——————————|BigDataEnd|——|Hadoop&Spark.V04|——|Spark.v04|Spark 原理源码|Master Worker解析&Worker启动流程|

一、Master Worker解析

### --- 源码提取说明：Worker启动流程

~~~     # 源码提取说明：Worker.scala
~~~     # 18行~90行
package org.apache.spark.deploy.worker

private[deploy] class Worker(
                              override val rpcEnv: RpcEnv,
                              webUiPort: Int,
                              cores: Int,
                              memory: Int,
                              masterRpcAddresses: Array[RpcAddress],
                              endpointName: String,
                              workDirPath: String = null,
                              val conf: SparkConf,
                              val securityMgr: SecurityManager)
  extends ThreadSafeRpcEndpoint with Logging {
  private val host = rpcEnv.address.host
  private val port = rpcEnv.address.port Utils.checkHost(host, "Expected hostname") assert (port > 0)
  private val forwordMessageScheduler = ThreadUtils.newDaemonSingleThreadScheduledExecutor("worker-forward-message-scheduler")
  // A separated thread to clean up the workDir and the directories of finished applications. Used to provide the implicit parameter of `Future` methods.
  // 一个分离的线程清理workDir和目录应用程序目录。
  private val cleanupThreadExecutor = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonSingleThreadExecutor("worker-cleanup-thread"))
  // For worker and executor IDs
  private def createDateFormat = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US)
  // Send a heartbeat every (heartbeat timeout) / 4 milliseconds
  // 发送心跳
  private val HEARTBEAT_MILLIS = conf.getLong("spark.worker.timeout", 60) * 1000 / 4
  private val INITIAL_REGISTRATION_RETRIES = 6
  private val TOTAL_REGISTRATION_RETRIES = INITIAL_REGISTRATION_RETRIES + 10
  private val FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND = 0.500
  private val REGISTRATION_RETRY_FUZZ_MULTIPLIER = {
    val randomNumberGenerator = new Random(UUID.randomUUID.getMostSignificantBits) randomNumberGenerator.nextDouble + FUZZ_MULTIPLIER_INTERVAL_LOWER_BOUND
  }

  private val INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS = (math.round(10 * REGISTRATION_RETRY_FUZZ_MULTIPLIER))
  private val PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS = (math.round(60 * REGISTRATION_RETRY_FUZZ_MULTIPLIER))
  private val CLEANUP_ENABLED = conf.getBoolean("spark.worker.cleanup.enabled", false)

  // How often worker will clean up old app folders
  private val CLEANUP_INTERVAL_MILLIS =  conf.getLong("spark.worker.cleanup.interval", 60 * 30) * 1000
  // TTL for app folders/data; after TTL expires it will be cleaned up
  private val APP_DATA_RETENTION_SECONDS = conf.getLong("spark.worker.cleanup.appDataTtl", 7 * 24 * 3600)
  private val testing: Boolean = sys.props.contains("spark.testing")
  private var master: Option[RpcEndpointRef] = None
  private val preferConfiguredMasterAddress =  conf.getBoolean("spark.worker.preferConfiguredMasterAddress", false)
  private var masterAddressToConnect: Option[RpcAddress] = None
  private var activeMasterUrl: String = ""
  private[worker] var activeMasterWebUiUrl : String = ""
  private var workerWebUiUrl: String = ""
  private val workerUri = RpcEndpointAddress(rpcEnv.address, endpointName).toString
  private var registered = false
  private var connected = false
  private val workerId = generateWorkerId()
  private val sparkHome =
    if (testing) { assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
      new File(sys.props("spark.test.home"))
    } else {
      new File(sys.env.get("SPARK_HOME").getOrElse("."))
    }

  var workDir: File = null
  val finishedExecutors = new LinkedHashMap[String, ExecutorRunner]
  val drivers = new HashMap[String, DriverRunner]
  val executors = new HashMap[String, ExecutorRunner]
  val finishedDrivers = new LinkedHashMap[String, DriverRunner]
  val appDirectories = new HashMap[String, Seq[String]]
  val finishedApps = new HashSet[String]
  val retainedExecutors = conf.getInt("spark.worker.ui.retainedExecutors",  WorkerWebUI.DEFAULT_RETAINED_EXECUTORS)
  val retainedDrivers = conf.getInt("spark.worker.ui.retainedDrivers",  WorkerWebUI.DEFAULT_RETAINED_DRIVERS)
  // The shuffle service is not actually started unless configured.
  // 除非配置好，否则实际上不会启动shuffle服务。
  private val shuffleService = new ExternalShuffleService(conf, securityMgr)
  private val publicAddress = {
    val envVar = conf.getenv("SPARK_PUBLIC_DNS")
    if (envVar != null) envVar else host
  }
  private var webUi: WorkerWebUI = null
  private var connectionAttemptCount = 0
  private val metricsSystem = MetricsSystem.createMetricsSystem("worker", conf, securityMgr)
  private val workerSource = new WorkerSource(this)
  private var registerMasterFutures: Array[JFuture[_]] = null
  private var registrationRetryTimer: Option[JScheduledFuture[_]] = None
  private val registerMasterThreadPool = ThreadUtils.newDaemonCachedThreadPool( "worker-register-master-threadpool", masterRpcAddresses.length
      // Make sure we can register with all masters at the same time
      )
  var coresUsed = 0
  var memoryUsed = 0
  def coresFree: Int = cores - coresUsed
  def memoryFree: Int = memory - memoryUsed
  /**
   * work节点的启动函数
   */

~~~     # 源码提取说明：Worker.scala
~~~     # 91行~195行

  override def onStart() {
    assert(!registered)
    logInfo("Starting Spark worker %s:%d with %d cores, %s RAM".format( host, port, cores, Utils.megabytesToString(memory)))
    logInfo(s"Running Spark version ${org.apache.spark.SPARK_VERSION}")
    logInfo("Spark home: " + sparkHome) createWorkDir()
    // 启动额外的shuffle服务，除非用户配置了 否则不会启动的
    shuffleService.startIfEnabled()
    webUi = new WorkerWebUI(this, workDir, webUiPort)
    webUi.bind()
    workerWebUiUrl = s"http://$publicAddress:${webUi.boundPort}"
    // 开始注册Master
    registerWithMaster()
    /** work节点申请资源 */
    metricsSystem.registerSource(workerSource)
    /** 启动work 节点 的监控*/
    metricsSystem.start()
    metricsSystem.getServletHandlers.foreach(webUi.attachHandler)
  }
  private def tryRegisterAllMasters(): Array[JFuture[_]] = {
    masterRpcAddresses.map { masterAddress =>
      registerMasterThreadPool.submit(new Runnable {
        override def run(): Unit = {
          try {
            // INFO Worker: Connecting to master 192.168.2.89:60115...
            logInfo("Connecting to master " + masterAddress + "...")
            val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress,  Master.ENDPOINT_NAME)
            // 向所有的Master发送RegisterWorker消息 ===》master.receive: PartialFunction[Any,
            Unit]方法去处理
            sendRegisterMessageToMaster(masterEndpoint)
          } catch {
            case ie: InterruptedException => // Cancelled
            case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
          }
        }
      })
    }
  }
  /**
   * Re-register with the master because a network failure or a master failure hasoccurred. If the re-registration attempt threshold is exceeded, the worker exits witherror.
   * Note that for thread-safety this should only be called from the rpcEndpoint.
   */
  private def reregisterWithMaster(): Unit = {
    Utils.tryOrExit {
      connectionAttemptCount += 1
      if (registered) {
        cancelLastRegistrationRetry()
      } else if (connectionAttemptCount <= TOTAL_REGISTRATION_RETRIES) {logInfo(s"Retrying connection to master (attempt # $connectionAttemptCount)")
        master match {
          case Some(masterRef) =>
            // registered == false && master != None means we lost the connection to master, so
            // masterRef cannot be used and we need to recreate it again. Note: we must not set
            // master to None due to the above comments.
            // registered == false && master != None这也意味和我们丢失了和主节点的连接，所以masterRef是不能使用了，
            // 我们需要重新创建它，注意：我们必须重新设置master=none因为上面的原因
            if (registerMasterFutures != null) {
              registerMasterFutures.foreach(_.cancel(true))
            }
            val masterAddress =
              if (preferConfiguredMasterAddress) masterAddressToConnect.get else
                masterRef.address
            registerMasterFutures = Array(registerMasterThreadPool.submit(new Runnable {
              override def run(): Unit = {
                try {
                  logInfo("Connecting to master " + masterAddress + "...")
                  val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress,
                    Master.ENDPOINT_NAME)
                  sendRegisterMessageToMaster(masterEndpoint)
                } catch {
                  case ie: InterruptedException => // Cancelled
          case NonFatal(e) => logWarning(s"Failed to connect to master $masterAddress", e)
                }
              }
            }))
          case None =>
            if (registerMasterFutures != null) {
              registerMasterFutures.foreach(_.cancel(true))
            }
            // We are retrying the initial registration
            // 尝试初始注册master
          registerMasterFutures = tryRegisterAllMasters()
        }
        // We have exceeded the initial registration retry threshold
        // All retries from now on should use a higher interval
        if (connectionAttemptCount == INITIAL_REGISTRATION_RETRIES) {
          registrationRetryTimer.foreach(_.cancel(true))
          registrationRetryTimer = Some(
            forwordMessageScheduler.scheduleAtFixedRate(new Runnable {
              override def run(): Unit = Utils.tryLogNonFatalError {
                self.send(ReregisterWithMaster)
              }

            }, PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
              PROLONGED_REGISTRATION_RETRY_INTERVAL_SECONDS,
              TimeUnit.SECONDS))
        }
      } else {
        logError("All masters are unresponsive! Giving up.")
        System.exit(1)
      }
    }
  }
  /**
   * Cancel last registeration retry, or do nothing if no retry
   * 取消上次重试的注册，或者如果没有重试，什么也不做
   */

~~~     # 源码提取说明：Worker.scala
~~~     # 196行~ 271行

  private def cancelLastRegistrationRetry(): Unit = ... ...
  /**
   * 开始注册master
   * Worker的registerWithMaster方法用于将Worker注册到Master.其中调用了tryRegisterAllMasters方法向所有Master发送RegisterWorker消息。
   */
  private def registerWithMaster() {
    registrationRetryTimer match {
      case None =>
        registered = false
        // tryRegisterAllMasters方法向所有Master发送RegisterWorker消息。
        registerMasterFutures = tryRegisterAllMasters()
        connectionAttemptCount = 0
        // 这是一个定时任务 发送ReregisterWithMaster消息
        registrationRetryTimer = Some(forwordMessageScheduler.scheduleAtFixedRate(
          new Runnable {
            override def run(): Unit = Utils.tryLogNonFatalError {
              Option(self).foreach(_.send(ReregisterWithMaster))
            }
          },
          INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
          INITIAL_REGISTRATION_RETRY_INTERVAL_SECONDS,
          TimeUnit.SECONDS))
      case Some(_) =>
        logInfo("Not spawning another attempt to register with the master, since there is an attempt scheduled already.")
    }
  }
  private def sendRegisterMessageToMaster(masterEndpoint: RpcEndpointRef): Unit = { masterEndpoint.send(RegisterWorker(
    workerId,
    host,
    port,
    self,
    cores,
    memory,
    workerWebUiUrl,
    masterEndpoint.address))
  }
}
  override def receive: PartialFunction[Any, Unit] = synchronized {
  case msg: RegisterWorkerResponse => handleRegisterResponse(msg)
  // Worker收到SendHeartbeat消息后，会向Master转发Heartbeat消息。
  case SendHeartbeat =>
  if (connected) { sendToMaster(Heartbeat(workerId, self)) }
    case WorkDirCleanup =>
    case MasterChanged(masterRef, masterWebUiUrl) =>
    case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
    case executorStateChanged @ ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
    // 启动Driver程序
    case LaunchDriver(driverId, driverDesc) =>
    case KillDriver(driverId) =>
    case driverStateChanged @ DriverStateChanged(driverId, state, exception) =>  handleDriverStateChanged(driverStateChanged)
    case ReregisterWithMaster =>  reregisterWithMaster()
    case ApplicationFinished(id) =>
}
  override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = { case RequestWorkerState =>
  }
  override def onDisconnected(remoteAddress: RpcAddress): Unit = {
    ... ...
  }
  private def masterDisconnected() {
    ... ...
  }
  private def generateWorkerId(): String = {"worker-%s-%s-%d".format(createDateFormat.format(new Date), host, port)
  }
  override def onStop() {
    cleanupThreadExecutor.shutdownNow()
    metricsSystem.report()
    cancelLastRegistrationRetry()
    forwordMessageScheduler.shutdownNow()
    registerMasterThreadPool.shutdownNow()
    executors.values.foreach(_.kill())
    drivers.values.foreach(_.kill())
    shuffleService.stop()
    webUi.stop()
    metricsSystem.stop()
  }
}

~~~     # 源码提取说明：Worker.scala
~~~     # 272行~329行

private[deploy] object Worker extends Logging {
  val SYSTEM_NAME = "sparkWorker"
  val ENDPOINT_NAME = "Worker"
  /**
   * Worker节点也通过main函数来启动，主要完成以下内容：
   * 1.创建SparkConf。
   * 2.Worker参数解析。
   * 3.创建，启动ActorSystem,并且向ActorSystem注册Worker.
   */
  def main(argStrings: Array[String]) {
    Utils.initDaemon(log)
    val conf = new SparkConf
    val args = new WorkerArguments(argStrings, conf)
    val rpcEnv = startRpcEnvAndEndpoint(args.host, args.port, args.webUiPort, args.cores,
      args.memory, args.masters, args.workDir, conf = conf)
    rpcEnv.awaitTermination()
  }
  def startRpcEnvAndEndpoint(
                              host: String,
                              port: Int,
                              webUiPort: Int,
                              cores: Int,
                              memory: Int,
                              masterUrls: Array[String],
                              workDir: String,
                              workerNumber: Option[Int] = None,
                              conf: SparkConf = new SparkConf): RpcEnv = {
    // The LocalSparkCluster runs multiple local sparkWorkerX RPC Environments
    // LocalSparkCluster运行多个本地sparkWorkerX RPC环境。
    val systemName = SYSTEM_NAME + workerNumber.map(_.toString).getOrElse("")
    val securityMgr = new SecurityManager(conf)
    val rpcEnv = RpcEnv.create(systemName, host, port, conf, securityMgr)
    val masterAddresses = masterUrls.map(RpcAddress.fromSparkURL(_))
    // 调用这个方法之前，会调用,worker的onStart()方法，
    rpcEnv.setupEndpoint(ENDPOINT_NAME, new Worker(rpcEnv, webUiPort, cores, memory, masterAddresses, ENDPOINT_NAME, workDir, conf, securityMgr))
    rpcEnv
  }
  def isUseLocalNodeSSLConfig(cmd: Command): Boolean = {
    val pattern = """\-Dspark\.ssl\.useNodeLocalConf\=(.+)""".r
    val result = cmd.javaOpts.collectFirst {
      case pattern(_result) => _result.toBoolean
    }
    result.getOrElse(false)
  }
  def maybeUpdateSSLSettings(cmd: Command, conf: SparkConf): Command = {
    val prefix = "spark.ssl."
    val useNLC = "spark.ssl.useNodeLocalConf"
    if (isUseLocalNodeSSLConfig(cmd)) {
      val newJavaOpts = cmd.javaOpts.filter(opt => !opt.startsWith(s"-D$prefix")) ++ conf.getAll.collect { case (key, value) if key.startsWith(prefix) => s"-
          D$key=$value" } :+
          s"-D$useNLC=true"
          cmd.copy(javaOpts = newJavaOpts)
        } else {
        cmd
      }
    }
}

二、worker启动流程：总结

### --- 小结：

~~~     Master、Worker都是RpcEndpoint，实现了 RpcEndpoint 接口。主要任务收发、处理消息；
~~~     Master、Worker的生命周期都遵循:constructor -> onStart -> receive* -> onStop
~~~     在Master的onStart方法中最重要的事情是：执行恢复
~~~     在Worker的onStart方法中最重要的事情是：向master注册

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart

——W.S.Landor