spark源码(十二)spark-submit 流程详解
一般提交任务都是spark-submit --class org.apache.spark.examples.SparkPi 类似的
我们下面就以这个SparkPi 完成源码的跟进
寻找main函数类
我们跟进一个spark命令行的样例详细看看spark的源码
cat /opt/spark/spark/bin/spark-submit
拿到执行的类:org.apache.spark.deploy.SparkSubmit 继续跟进
org.apache.spark.deploy.SparkSubmit 1017 行
override def main(args: Array[String]): Unit = {
val submit = new SparkSubmit() {
self =>
......
override def doSubmit(args: Array[String]): Unit = {
try {
super.doSubmit(args)/*还是调用了父类的方法*/
} catch {
case e: SparkUserAppException =>
exitFn(e.exitCode)
}
}
......
}
submit.doSubmit(args)/*中心方法*/
}
org.apache.spark.deploy.SparkSubmit 80行
def doSubmit(args: Array[String]): Unit = {
val uninitLog = initializeLogIfNecessary(true, silent = true)
val appArgs = parseArguments(args)/*解析参数*/
if (appArgs.verbose) {
logInfo(appArgs.toString)
}
appArgs.action match {/*任务是提交 kill */
case SparkSubmitAction.SUBMIT => submit(appArgs, uninitLog)
case SparkSubmitAction.KILL => kill(appArgs)
case SparkSubmitAction.REQUEST_STATUS => requestStatus(appArgs)
case SparkSubmitAction.PRINT_VERSION => printVersion()
}
}
org.apache.spark.deploy.SparkSubmit 156行
private def submit(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
def doRunMain(): Unit = {
if (args.proxyUser != null) {/*代理用户是否为空*/
val proxyUser = UserGroupInformation.createProxyUser(args.proxyUser,
UserGroupInformation.getCurrentUser())
try {
proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
override def run(): Unit = {
runMain(args, uninitLog)
}
})
} catch {
case e: Exception =>
if (e.getStackTrace().length == 0) {
error(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
} else {
throw e
}
}
} else {
runMain(args, uninitLog)/*最终调用的方法*/
}
}
if (args.isStandaloneCluster && args.useRest) {/*判断集群部署模式*/
try {
logInfo("Running Spark using the REST application submission protocol.")
doRunMain()/*最终还是调用 doRunMain 在上面*/
} catch {
case e: SubmitRestConnectionException =>
logWarning(s"Master endpoint ${args.master} was not a REST server. " +
"Falling back to legacy submission gateway instead.")
args.useRest = false
submit(args, false)
}
} else {
doRunMain()/*最终还是调用 doRunMain 在上面*/
}
}
org.apache.spark.deploy.SparkSubmit 893行
private def runMain(args: SparkSubmitArguments, uninitLog: Boolean): Unit = {
/*内部是判断集群模式*/
val (childArgs, childClasspath, sparkConf, childMainClass) = prepareSubmitEnvironment(args)
if (uninitLog) {
Logging.uninitialize()
}
if (args.verbose) {
logInfo(s"Main class:\n$childMainClass")
logInfo(s"Arguments:\n${childArgs.mkString("\n")}")
logInfo(s"Spark config:\n${Utils.redact(sparkConf.getAll.toMap).mkString("\n")}")
logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}")
logInfo("\n")
}
val loader = getSubmitClassLoader(sparkConf)/*获取类加载器*/
for (jar <- childClasspath) {
addJarToClasspath(jar, loader)/*上传jar包到classpath*/
}
var mainClass: Class[_] = null
try {
mainClass = Utils.classForName(childMainClass)/*加载类*/
} catch {
......
}
/*类是不是SparkApplication的子类*/
val app: SparkApplication =
if (classOf[SparkApplication].isAssignableFrom(mainClass)) {
mainClass.getConstructor().newInstance().asInstanceOf[SparkApplication]
} else {
new JavaMainApplication(mainClass)/*初始化 SparkApplication*/
}
......
try {
app.start(childArgs.toArray, sparkConf)/*启动方法 JavaMainApplication 的实现类*/
} catch {
case t: Throwable =>
throw findCause(t)
} finally {
if (args.master.startsWith("k8s") && !isShell(args.primaryResource) &&
!isSqlShell(args.mainClass) && !isThriftServer(args.mainClass)) {
try {
SparkContext.getActive.foreach(_.stop())
} catch {
case e: Throwable => logError(s"Failed to close SparkContext: $e")
}
}
}
}
org.apache.spark.deploy.JavaMainApplication 41行
override def start(args: Array[String], conf: SparkConf): Unit = {
val mainMethod = klass.getMethod("main", new Array[String](0).getClass)/*反射机制调用改类的main方法*/
if (!Modifier.isStatic(mainMethod.getModifiers)) {
throw new IllegalStateException("The main method in the given main class must be static")
}
val sysProps = conf.getAll.toMap
sysProps.foreach { case (k, v) =>
sys.props(k) = v
}
mainMethod.invoke(null, args)//朴实无华的invoke完成加载
}
搬砖多年终不得要领,遂载源码看之望得真经。
分类:
spark-core
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?