Spark-shell启动脚本解读
#!/usr/bin/env bash # # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # # Shell script for starting the Spark Shell REPL #判断是否为cygwin cygwin=false case "`uname`" in CYGWIN*) cygwin=true;; esac # Enter posix mode for bash set -o posix ## Global script variables #进入到spark的安装目录 FWDIR="$(cd `dirname $0`/..; pwd)" #定义帮助信息的方法 #调用spark-submit的帮助信息,只是把submit以下帮助信息过滤掉 # Usage: spark-submit [options] <app jar | python file> [app arguments] # Usage: spark-submit --kill [submission ID] --master [spark://...] # Usage: spark-submit --status [submission ID] --master [spark://...] function usage() { echo "Usage: ./bin/spark-shell [options]" $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 exit 0 } if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then usage fi #引用utils.sh脚本,脚本的功能为整理脚本参数、判断部分参数的合法性,给以下两个变量赋值 #SUBMISSION_OPTS: #SUBMISSION_OPTS参数包括: # K-V形式的有: --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \ # --conf | --properties-file | --driver-memory | --driver-java-options | \ # --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \ # --total-executor-cores | --executor-cores | --queue | --num-executors | --archives # 非K-V形式的有 # --verbose | -v | --supervise # KV形式的需要对个数进行判断 #
#APPLICATION_OPTS参数包括除SUBMISSION_OPTS之外的参数
source $FWDIR/bin/utils.sh
#定义帮助信息方法的变量 SUBMIT_USAGE_FUNCTION=usage
#调用utils.sh脚本中的gatherSparkSubmitOpts方法。对参数进行整理 gatherSparkSubmitOpts "$@" #主函数,调用spark-submit --class org.apache.spark.repl.Main方法 function main() { if $cygwin; then # Workaround for issue involving JLine and Cygwin # (see http://sourceforge.net/p/jline/bugs/40/). # If you're using the Mintty terminal emulator in Cygwin, may need to set the # "Backspace sends ^H" setting in "Keys" section of the Mintty options # (see https://github.com/sbt/sbt/issues/562). stty -icanon min 1 -echo > /dev/null 2>&1 export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix" $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}" stty icanon echo > /dev/null 2>&1 else export SPARK_SUBMIT_OPTS $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}" fi } # Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in # binary distribution of Spark where Scala is not installed exit_status=127 saved_stty="" # restore stty settings (echo in particular) function restoreSttySettings() { stty $saved_stty saved_stty="" } function onExit() { if [[ "$saved_stty" != "" ]]; then restoreSttySettings fi exit $exit_status } # to reenable echo if we are interrupted before completing. trap onExit INT # save terminal settings saved_stty=$(stty -g 2>/dev/null) # clear on error so we don't later try to restore them if [[ ! $? ]]; then saved_stty="" fi main "$@" # record the exit status lest it be overwritten: # then reenable echo and propagate the code. exit_status=$? onExit
utils.sh脚本内容:
1 #!/usr/bin/env bash 2 3 # 4 # Licensed to the Apache Software Foundation (ASF) under one or more 5 # contributor license agreements. See the NOTICE file distributed with 6 # this work for additional information regarding copyright ownership. 7 # The ASF licenses this file to You under the Apache License, Version 2.0 8 # (the "License"); you may not use this file except in compliance with 9 # the License. You may obtain a copy of the License at 10 # 11 # http://www.apache.org/licenses/LICENSE-2.0 12 # 13 # Unless required by applicable law or agreed to in writing, software 14 # distributed under the License is distributed on an "AS IS" BASIS, 15 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 # See the License for the specific language governing permissions and 17 # limitations under the License. 18 # 19 20 # Gather all all spark-submit options into SUBMISSION_OPTS 21 function gatherSparkSubmitOpts() { 22 23 if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then 24 echo "Function for printing usage of $0 is not set." 1>&2 25 echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2 26 exit 1 27 fi 28 29 # NOTE: If you add or remove spark-sumbmit options, 30 # modify NOT ONLY this script but also SparkSubmitArgument.scala 31 SUBMISSION_OPTS=() 32 APPLICATION_OPTS=() 33 while (($#)); do 34 case "$1" in 35 --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \ 36 --conf | --properties-file | --driver-memory | --driver-java-options | \ 37 --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \ 38 --total-executor-cores | --executor-cores | --queue | --num-executors | --archives) 39 if [[ $# -lt 2 ]]; then 40 "$SUBMIT_USAGE_FUNCTION" 41 exit 1; 42 fi 43 SUBMISSION_OPTS+=("$1"); shift 44 SUBMISSION_OPTS+=("$1"); shift 45 ;; 46 47 --verbose | -v | --supervise) 48 SUBMISSION_OPTS+=("$1"); shift 49 ;; 50 51 *) 52 APPLICATION_OPTS+=("$1"); shift 53 ;; 54 esac 55 done 56 57 export SUBMISSION_OPTS 58 export APPLICATION_OPTS 59 }