

  • bin/spark-submit
if [ -z "${SPARK_HOME}" ]; then
  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"

# disable randomized hash for string in Python 3.3+
exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkSubmit "$@"
  • org.apache.spark.deploy.SparkSubmit
  def main(args: Array[String]): Unit = {
    val appArgs = new SparkSubmitArguments(args)
    if (appArgs.verbose) {
      // scalastyle:off println
      // scalastyle:on println
    appArgs.action match {
      case SparkSubmitAction.SUBMIT => submit(appArgs)
      case SparkSubmitAction.KILL => kill(appArgs)
      case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
  • org.apache.spark.deploy.SparkSubmit#submit
  private def submit(args: SparkSubmitArguments): Unit = {
    val (childArgs, childClasspath, sysProps, childMainClass) = prepareSubmitEnvironment(args)

    def doRunMain(): Unit = {
      if (args.proxyUser != null) {
    	runMain(childArgs, childClasspath, sysProps, childMainClass, args.verbose)
      } else {
        runMain(childArgs, childClasspath, sysProps, childMainClass, args.verbose)

  • org.apache.spark.deploy.SparkSubmit#prepareSubmitEnvironment


   * Prepare the environment for submitting an application.
   * This returns a 4-tuple:
   *   (1) the arguments for the child process,
   *   (2) a list of classpath entries for the child,
   *   (3) a map of system properties, and
   *   (4) the main class for the child
   * Exposed for testing.
1. 核心思想就是返回子进程需要的参数、子进程需要的classpath、环境变量和mainClass
2. 此处我们只需寻找yarn-client和yarn-cluster的过程。其它mesos和standalone先不做分析。
    // Set the cluster manager
    val clusterManager: Int = args.master match {
      case m if m.startsWith("yarn") => YARN
      case m if m.startsWith("spark") => STANDALONE
      case m if m.startsWith("mesos") => MESOS
      case m if m.startsWith("local") => LOCAL
      case _ => printErrorAndExit("Master must start with yarn, spark, mesos, or local"); -1

    // Set the deploy mode; default is client mode
    var deployMode: Int = args.deployMode match {
      case "client" | null => CLIENT
      case "cluster" => CLUSTER
      case _ => printErrorAndExit("Deploy mode must be either client or cluster"); -1
  • 以上代码是判断我们的集群类型和部署方式,集群类型分为四种,分别为YARN/STANDALONE/MESOS/LOCAL。部署方式分为 CLIENT/CLUSTER
    // In client mode, launch the application main class directly
    // In addition, add the main application jar and any added jars (if any) to the classpath
    if (deployMode == CLIENT) {
      childMainClass = args.mainClass
      if (isUserJar(args.primaryResource)) {
        childClasspath += args.primaryResource
      if (args.jars != null) { childClasspath ++= args.jars.split(",") }
      if (args.childArgs != null) { childArgs ++= args.childArgs }
  • CLIENT的模式,那么直接用我们application的mainClass, mainClass要么用--class指定,要么从jar包的读取。

    mainClass = jar.getManifest.getMainAttributes.getValue("Main-Class")


      // In yarn-cluster mode, use yarn.Client as a wrapper around the user class
   if (isYarnCluster) {
     childMainClass = "org.apache.spark.deploy.yarn.Client"
     if (args.isPython) {
       childArgs += ("--primary-py-file", args.primaryResource)
       if (args.pyFiles != null) {
         childArgs += ("--py-files", args.pyFiles)
       childArgs += ("--class", "org.apache.spark.deploy.PythonRunner")
     } else if (args.isR) {
       val mainFile = new Path(args.primaryResource).getName
       childArgs += ("--primary-r-file", mainFile)
       childArgs += ("--class", "org.apache.spark.deploy.RRunner")
     } else {
       if (args.primaryResource != SPARK_INTERNAL) {
         childArgs += ("--jar", args.primaryResource)
       childArgs += ("--class", args.mainClass)
     if (args.childArgs != null) {
       args.childArgs.foreach { arg => childArgs += ("--arg", arg) }
  • CLUSTER 模式呢,mainClass指定为 org.apache.spark.deploy.yarn.Client,对application本身的mainClass做了一层封装。

  • 将application自己的mainClass以--class参数的方式传递到 org.apache.spark.deploy.yarn.Client的main方法中。 在Client中,会以此参数作为区别CLUSTER和CLIENT的依据。

    def isClusterMode: Boolean = userClass != null

  • 为什么需要在Client类中还要区分CLUSTER和CLIENT呢?后面会有详细的介绍。

  • 最终我们生成了mainClass和classPath了,下一步需要执行了。

   * Run the main method of the child class using the provided launch environment.
   * Note that this main class will not be the one provided by the user if we're
   * running cluster deploy mode or python applications.
  private def runMain(
      childArgs: Seq[String],
      childClasspath: Seq[String],
      sysProps: Map[String, String],
      childMainClass: String,
      verbose: Boolean): Unit = {
    // scalastyle:off println
    if (verbose) {
      printStream.println(s"Main class:\n$childMainClass")
      printStream.println(s"System properties:\n${sysProps.mkString("\n")}")
      printStream.println(s"Classpath elements:\n${childClasspath.mkString("\n")}")
    // scalastyle:on println

    val loader =
      if (sysProps.getOrElse("spark.driver.userClassPathFirst", "false").toBoolean) {
        new ChildFirstURLClassLoader(new Array[URL](0),
      } else {
        new MutableURLClassLoader(new Array[URL](0),

    for (jar <- childClasspath) {
      addJarToClasspath(jar, loader)

    for ((key, value) <- sysProps) {
      System.setProperty(key, value)

    var mainClass: Class[_] = null

    try {
      mainClass = Utils.classForName(childMainClass)
    } catch {
      case e: ClassNotFoundException =>
        if (childMainClass.contains("thriftserver")) {
          // scalastyle:off println
          printStream.println(s"Failed to load main class $childMainClass.")
          printStream.println("You need to build Spark with -Phive and -Phive-thriftserver.")
          // scalastyle:on println
      case e: NoClassDefFoundError =>
        if (e.getMessage.contains("org/apache/hadoop/hive")) {
          // scalastyle:off println
          printStream.println(s"Failed to load hive class.")
          printStream.println("You need to build Spark with -Phive and -Phive-thriftserver.")
          // scalastyle:on println

    // SPARK-4170
    if (classOf[scala.App].isAssignableFrom(mainClass)) {
      printWarning("Subclasses of scala.App may not work correctly. Use a main() method instead.")

    val mainMethod = mainClass.getMethod("main", new Array[String](0).getClass)
    if (!Modifier.isStatic(mainMethod.getModifiers)) {
      throw new IllegalStateException("The main method in the given main class must be static")

    def findCause(t: Throwable): Throwable = t match {
      case e: UndeclaredThrowableException =>
        if (e.getCause() != null) findCause(e.getCause()) else e
      case e: InvocationTargetException =>
        if (e.getCause() != null) findCause(e.getCause()) else e
      case e: Throwable =>

    try {
      mainMethod.invoke(null, childArgs.toArray)
    } catch {
      case t: Throwable =>
        findCause(t) match {
          case SparkUserAppException(exitCode) =>

          case t: Throwable =>
            throw t
  • CLIENT模式,那么application的mainClass开始执行,一般是初始化SparkConf,创建SparkContext,生成DAGScheduler、TaskScheduler和YarnClientSchedulerBackend。并且在YarnClientSchedulerBackend中启动org.apache.spark.deploy.yarn.Client
    override def start() {
    val driverHost = conf.get("")
    val driverPort = conf.get("spark.driver.port")
    val hostport = driverHost + ":" + driverPort
    sc.ui.foreach { ui => conf.set("spark.driver.appUIAddress", ui.appUIAddress) }

    val argsArrayBuf = new ArrayBuffer[String]()
    argsArrayBuf += ("--arg", hostport)
    argsArrayBuf ++= getExtraClientArguments

    logDebug("ClientArguments called with: " + argsArrayBuf.mkString(" "))
    val args = new ClientArguments(argsArrayBuf.toArray, conf)
    totalExpectedExecutors = args.numExecutors
    client = new Client(args, conf)
    appId = client.submitApplication()

    // SPARK-8687: Ensure all necessary properties have already been set before
    // we initialize our driver scheduler backend, which serves these properties
    // to the executors


    // SPARK-8851: In yarn-client mode, the AM still does the credentials refresh. The driver
    // reads the credentials from HDFS, just like the executors and updates its own credentials
    // cache.
    if (conf.contains("spark.yarn.credentials.file")) {
    monitorThread = asyncMonitorApplication()
  • CLUSTER模式下,在运行runMainClass的时候,会调用 org.apache.spark.deploy.yarn.Client的main方法
    def main(argStrings: Array[String]) {
    if (!sys.props.contains("SPARK_SUBMIT")) {
      logWarning("WARNING: This client is deprecated and will be removed in a " +
        "future version of Spark. Use ./bin/spark-submit with \"--master yarn\"")

    // Set an env variable indicating we are running in YARN mode.
    // Note that any env variable with the SPARK_ prefix gets propagated to all (remote) processes
    System.setProperty("SPARK_YARN_MODE", "true")
    val sparkConf = new SparkConf

    val args = new ClientArguments(argStrings, sparkConf)
    // to maintain backwards-compatibility
    if (!Utils.isDynamicAllocationEnabled(sparkConf)) {
      sparkConf.setIfMissing("spark.executor.instances", args.numExecutors.toString)
    new Client(args, sparkConf).run()
   * Submit an application to the ResourceManager.
   * If set spark.yarn.submit.waitAppCompletion to true, it will stay alive
   * reporting the application's status until the application has exited for any reason.
   * Otherwise, the client process will exit after submission.
   * If the application finishes with a failed, killed, or undefined status,
   * throw an appropriate SparkException.
  def run(): Unit = {
    this.appId = submitApplication()
    if (!launcherBackend.isConnected() && fireAndForget) {
      val report = getApplicationReport(appId)
      val state = report.getYarnApplicationState
      logInfo(s"Application report for $appId (state: $state)")
      if (state == YarnApplicationState.FAILED || state == YarnApplicationState.KILLED) {
        throw new SparkException(s"Application $appId finished with status: $state")
    } else {
      val (yarnApplicationState, finalApplicationStatus) = monitorApplication(appId)
      if (yarnApplicationState == YarnApplicationState.FAILED ||
        finalApplicationStatus == FinalApplicationStatus.FAILED) {
        throw new SparkException(s"Application $appId finished with failed status")
      if (yarnApplicationState == YarnApplicationState.KILLED ||
        finalApplicationStatus == FinalApplicationStatus.KILLED) {
        throw new SparkException(s"Application $appId is killed")
      if (finalApplicationStatus == FinalApplicationStatus.UNDEFINED) {
        throw new SparkException(s"The final status of application $appId is undefined")

  • 两者最终都会调用submitApplication的方法,提交应用到yarn,会在另外一篇博客中详细讲解。

