
private def completeRecovery() {
  // Ensure "only-once" recovery semantics using a short synchronization period.
  if (state != RecoveryState.RECOVERING) { return }
  state = RecoveryState.COMPLETING_RECOVERY
  // 清理的机制:1.从内存缓存结构中移除;2.从相关的组件的内存缓存中移除;3.从持久化存储中移除
  // Kill off any workers and apps that didn't respond to us.
  workers.filter(_.state == WorkerState.UNKNOWN).foreach(removeWorker)//清理worker的信息
  apps.filter(_.state == ApplicationState.UNKNOWN).foreach(finishApplication)//清理Application信息
  // 如果Driver为空的话,重新尝试启动Driver
  // Reschedule drivers which were not claimed by any workers
  drivers.filter(_.worker.isEmpty).foreach { d =>
    logWarning(s"Driver ${d.id} was not found after master recovery")
    if (d.desc.supervise) {
      logWarning(s"Re-launching ${d.id}")
    } else {
      removeDriver(d.id, DriverState.ERROR, None)
      logWarning(s"Did not re-launch ${d.id} because it was not supervised")
  state = RecoveryState.ALIVE
  schedule()    //资源组件发送改变时(Application、Worker)执行重新调度
  logInfo("Recovery complete - resuming operations!")


posted @ 2019-11-25 23:19  二黑诶  阅读(294)  评论(0编辑  收藏  举报