Spark-submit脚本解读
#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # NOTE: Any changes in this file must be reflected in SparkSubmitDriverBootstrapper.scala! #Spark的安装目录 export SPARK_HOME="$(cd `dirname $0`/..; pwd)" #将参数已数组的形式赋值给ORIG_ARGS ORIG_ARGS=("$@") #根据不同的参数项,把对应的参数值赋给对应的环境变量
while (($#)); do if [ "$1" = "--deploy-mode" ]; then SPARK_SUBMIT_DEPLOY_MODE=$2 elif [ "$1" = "--properties-file" ]; then SPARK_SUBMIT_PROPERTIES_FILE=$2 elif [ "$1" = "--driver-memory" ]; then export SPARK_SUBMIT_DRIVER_MEMORY=$2 elif [ "$1" = "--driver-library-path" ]; then export SPARK_SUBMIT_LIBRARY_PATH=$2 elif [ "$1" = "--driver-class-path" ]; then export SPARK_SUBMIT_CLASSPATH=$2 elif [ "$1" = "--driver-java-options" ]; then export SPARK_SUBMIT_OPTS=$2 fi shift done #定义一些默认的变量,会被用户的自定义参数覆盖 # :- 同 nvl DEFAULT_PROPERTIES_FILE="$SPARK_HOME/conf/spark-defaults.conf" export SPARK_SUBMIT_DEPLOY_MODE=${SPARK_SUBMIT_DEPLOY_MODE:-"client"} export SPARK_SUBMIT_PROPERTIES_FILE=${SPARK_SUBMIT_PROPERTIES_FILE:-"$DEFAULT_PROPERTIES_FILE"} # For client mode, the driver will be launched in the same JVM that launches # SparkSubmit, so we may need to read the properties file for any extra class # paths, library paths, java options and memory early on. Otherwise, it will # be too late by the time the driver JVM has started. #从spark-defaults.conf文件中获取"spark.driver.extra*\|spark.driver.memory" 两个变量的值 if [[ "$SPARK_SUBMIT_DEPLOY_MODE" == "client" && -f "$SPARK_SUBMIT_PROPERTIES_FILE" ]]; then # Parse the properties file only if the special configs exist contains_special_configs=$( grep -e "spark.driver.extra*\|spark.driver.memory" "$SPARK_SUBMIT_PROPERTIES_FILE" | \ grep -v "^[[:space:]]*#" ) if [ -n "$contains_special_configs" ]; then export SPARK_SUBMIT_BOOTSTRAP_DRIVER=1 fi fi #将参数传递spark-class
#exec命令在执行时会把当前的shell process关闭,然后换到后面的命令继续执行 exec $SPARK_HOME/bin/spark-class org.apache.spark.deploy.SparkSubmit "${ORIG_ARGS[@]}"