Spark-源码-TaskScheduler初始化过程, ClientActor向Master发送注册任务信息过程
Spark版本 1.3 Spark源码 Spark.createTaskScheduler TaskScheduler初始化过程 1.// SparkContext中 /** * Create a task scheduler based on a given master URL. * Return a 2-tuple of the scheduler backend and the task scheduler. */ private def createTaskScheduler( sc: SparkContext, master: String): (SchedulerBackend, TaskScheduler) = { // Regular expression used for local[N] and local[*] master formats // 一些关于模式的变量 这里只列举一个 (local[N] and local[*]); // 其他的还有 local[N, maxRetries], a Spark cluster of [N, cores, memory] locally, // Spark deploy clusters, Mesos cluster, Simr cluster val LOCAL_N_REGEX = """local\[([0-9]+|\*)\]""".r // When running locally, don't try to re-execute tasks on failure. // 他说本地模式下当任务失败的时候, 不会重试运行任务... val MAX_LOCAL_TASK_FAILURES = 1 master match { case "local" => ... // spark的StandAlone模式 case SPARK_REGEX(sparkUrl) => // 创建了一个TaskSchedulerImpl val scheduler = new TaskSchedulerImpl(sc) val masterUrls = sparkUrl.split(",").map("spark://" + _) // 创建了一个SparkDeploySchedulerBackend, 他到底是怎么创建的? 详见下文 1.1 val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls) // 调用initialize创建调度器 相见下文 1.2 // 下文会查看initialize方法~, 了解TaskScheduler的初始化过程 scheduler.initialize(backend) (backend, scheduler) case LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) => ... } } 1.1 val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls) class SparkDeploySchedulerBackend( scheduler: TaskSchedulerImpl, sc: SparkContext, masters: Array[String]) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.actorSystem) //注意这里传入了一个ActorSystem with AppClientListener with Logging { ... override def start() { // 首先调用父类的start方法来创建DriverActor // 用于和Executor通信, 将任务发送给Executor // 详见下文 1.1.1 super.start() // 准备一些参数,以后把这些参数封装到一个对象中,然后将该对象发送给Master val driverUrl ... // 重要: CoarseGrainedExecutorBackend 这个参数是以后Executor的实现类 // 把任务信息参数封装到 Command val command = Command("org.apache.spark.executor.CoarseGrainedExecutorBackend", args, sc.executorEnvs, classPathEntries ++ testingClassPath, libraryPathEntries, javaOpts) // 最终的封装: 把command 和 任务资源信息 封装到ApplicationDescriptionval val appDesc = new ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command, appUIAddress, sc.eventLogDir, sc.eventLogCodec) // 创建一个AppClient, 把ApplicationDescription通过主构造器传进去 client = new AppClient(sc.env.actorSystem, masters, appDesc, this, conf) // 然后调用AppClient的start方法,在start方法中创建了一个ClientActor // 其中像Master和Worker的actor一样需要preStart像Master注册 // 其用于与Master通信, 用来发送任务信息 详见下文 1.1.2 client.start() ... } 1.1.1 super.start() // 调用的是 CoarseGrainedSchedulerBackend.start (粗粒度调度程序后端器) class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val actorSystem: ActorSystem) extends ExecutorAllocationClient with SchedulerBackend with Logging{ ... override def start() { ... // (prashant) send conf instead of properties // 通过创建本粗粒度调度程序后端器时传入的ActorSystem, 在Driver端创建DriverActor // 其用来和Excutor交互, 将任务发送给Executor driverActor = actorSystem.actorOf( Props(new DriverActor(properties)), name = CoarseGrainedSchedulerBackend.ACTOR_NAME) } ... } // TaskScheduler的初始化过程, 在TaskSchedulerImpl中他的简介是这么写的 👇 /** * Schedules tasks for multiple types of clusters by acting through a SchedulerBackend. * It can also work with a local setup by using a LocalBackend and setting isLocal to true. * It handles common logic, like determining a scheduling order across jobs, waking up to launch * speculative tasks, etc. * * Clients should first call initialize() and start(), then submit task sets through the * runTasks method. * * THREADING: SchedulerBackends and task-submitting clients can call this class from multiple * threads, so it needs locks in public API methods to maintain its state. In addition, some * SchedulerBackends synchronize on themselves when they want to send events here, and then * acquire a lock on us, so we need to make sure that we don't try to lock the backend while * we are holding a lock on ourselves. */ """通过SchedulerBackend执行多种类型群集的计划任务. 它也可以通过使用LocalBackend并将isLocal设置为true来使用本地设置. 它处理通用逻辑, 例如确定跨作业的任务调度顺序, 唤醒以启动推测任务等. 客户端应首先调用 initialize() 和 start(), 然后通过 runTasks 方法提交TaskSet. THREADING: SchedulerBackends和任务提交客户端可以从多个线程调用此类, 因此需要同步公共API方法来维护其状态. 另外, 一些SchedulerBackend被Lock时不会尝试Lock后端调度器, 以防止发生死锁""" // TaskSchedulerImpl 源码 class TaskSchedulerImpl( val sc: SparkContext, val maxTaskFailures: Int, isLocal: Boolean = false) extends TaskScheduler with Logging { def initialize(backend: SchedulerBackend) { this.backend = backend // temporarily set rootPool name to empty // rootPool是调度池, 最小值设置为0, rootPool = new Pool("", schedulingMode, 0, 0) // 创建一个调度器构建器 schedulableBuilder = { schedulingMode match { // 指定任务信息的调度方式让Worker来拿取, 方式: 先进先出 和 公平调度 方法 case SchedulingMode.FIFO => new FIFOSchedulableBuilder(rootPool) case SchedulingMode.FAIR => new FairSchedulableBuilder(rootPool, conf) } } // 构建任务调度池 schedulableBuilder.buildPools() } } 1.1.2 private[spark] class AppClient( actorSystem: ActorSystem, masterUrls: Array[String], appDescription: ApplicationDescription, listener: AppClientListener, conf: SparkConf) extends Logging { ... //TODO ClientActor的生命周期方法 override def preStart() { context.system.eventStream.subscribe(self, classOf[RemotingLifecycleEvent]) try { //TODO ClientActor向Master注册 registerWithMaster() } catch { case e: Exception => logWarning("Failed to connect to master", e) markDisconnected() context.stop(self) } } def registerWithMaster() { // 向Master注册 tryRegisterAllMasters() ... } def tryRegisterAllMasters() { for (masterAkkaUrl <- masterAkkaUrls) { // 循环所有Master地址,跟Master建立连接 val actor = context.actorSelection(masterAkkaUrl) // 拿到Master的一个actor引用,然后向Master发送注册应用的请求,所有的参数都已经封装到 appDescription 继续往下看 1.1.3 actor ! RegisterApplication(appDescription) } } ... } 1.1.3 class Master(...){ //TODO ClientActor发送过来的注册应用的消息 case RegisterApplication(description) => { if (state == RecoveryState.STANDBY) { // ignore, don't send response } else { // 生成任务信息 val app = createApplication(description, sender) // 将应用的信息放到内存中存储 registerApplication(app) // 利用持久化引擎保存 persistenceEngine.addApplication(app) // Master向ClientActor发送注册成功的消息, 就是将appId 和Master的Url返送给ClientActor // 之后ClientActor会更新MasterUrl, 并且调用监听器开始监听任务的运行情况~之后整个任务注册就完成了 sender ! RegisteredApplication(app.id, masterUrl) // 重要:Master开始调度资源,其实就是把任务启动到哪些Worker上 // 整个集群的资源发生改变的时候调用schedule(), 注册时有新的worker节点, 提交任务 schedule() } } }