storm源码剖析(1):storm脚本
今天看了一下storm的命令行脚本${STORM_HOME}/bin/storm,现在将剖析过程整理一下,作为记录。注:使用的storm版本为0.8.0。
${STORM_HOME}/bin/storm文件是用python写的,该文件写的还是相当精简和清晰的。
首先,命令的运行从main()方法开始,main()方法主要是解析输入的命令和命令携带的参数以及读取默认配置和配置文件配置。
if __name__ == "__main__":
main()
def main():
if len(sys.argv) <= 1:
print_usage()
sys.exit(-1) global CONFIG_OPTS config_list, args = parse_config_opts(sys.argv[1:]) //读取配置项和参数(因为命令行以storm
//开头,故这里出去argv[0]),"-c"指定配置项,其他为参数
parse_config(config_list)
COMMAND = args[0] //提取执行命令(第一项为命令)
ARGS = args[1:] //提取命令携带的参数
(COMMANDS.get(COMMAND, "help"))(*ARGS) //查询COMMAND字典,获取要执行的命令对应的方法,调用该方法
以命令storm jar xxx.jar MAINCLASS arg1 arg2 为例,则上面的命令执行jar(xxx.jar, MAINCLASS, arg1, arg2)方法
下面重点分析一下命令执行方法,还是以jar()方法为例吧
def jar(jarfile, klass, *args): """Syntax: [storm jar topology-jar-path class ...] Runs the main method of class with the specified arguments. The storm jars and configs in ~/.storm are put on the classpath. The process is configured so that StormSubmitter (http://nathanmarz.github.com/storm/doc/backtype/storm/StormSubmitter.html) will upload the jar at topology-jar-path when the topology is submitted. """ exec_storm_class( klass, jvmtype="-client", extrajars=[jarfile, CONF_DIR, STORM_DIR + "/bin"], args=args, jvmopts=["-Dstorm.jar=" + jarfile]) def exec_storm_class(klass, jvmtype="-server", jvmopts=[], extrajars=[], args=[], fork=False): all_args = [ "java", jvmtype, get_config_opts(), "-Dstorm.home=" + STORM_DIR, "-Djava.library.path=" + confvalue("java.library.path", extrajars), "-cp", get_classpath(extrajars), ] + jvmopts + [klass] + list(args) print "Running: " + " ".join(all_args) if fork: os.spawnvp(os.P_WAIT, "java", all_args) else: os.execvp("java", all_args) # replaces the current process and never returns
实际执行的是exec_storm_class(klass, "-client", jvmopts, extrajars, args, fork=False),exec_storm_class()是所有strom命令的真正执行者。此方法的执行分两个关键得步骤:1.构建all_args这个list;2.使用系统调用启动java进程。分别对他们进行剖析:
1.构建all_args
all_args = [ "java", jvmtype, get_config_opts(), "-Dstorm.home=" + STORM_DIR, "-Djava.library.path=" + confvalue("java.library.path", extrajars), "-cp", get_classpath(extrajars), ] + jvmopts + [klass] + list(args) def get_config_opts():
"""
设置-Dstorm.options变量
"""
global CONFIG_OPTS return "-Dstorm.options=" + (','.join(CONFIG_OPTS)).replace(' ', "%%%%") def confvalue(name, extrapaths):
"""
启动进程“java -client backtype.storm.command.config_value $name”,来获取配置
""" command = [ "java", "-client", get_config_opts(), "-cp", get_classpath(extrapaths), "backtype.storm.command.config_value", name ] p = sub.Popen(command, stdout=sub.PIPE) //打开管道 output, errors = p.communicate() //从管道中读取输出和错误 lines = output.split("\n") for line in lines: tokens = line.split(" ") if tokens[0] == "VALUE:": return " ".join(tokens[1:]) return "" def get_classpath(extrajars):
"""
将STORM_DIR、STORM_DIR/lib、extrajars目录下的所有文件提取出来构建出classpath的值
""" ret = get_jars_full(STORM_DIR) ret.extend(get_jars_full(STORM_DIR + "/lib")) ret.extend(extrajars) return normclasspath(":".join(ret))
重点关注confvalue方法,该方法使用subprocess模块启动java进程“java -client backtype.storm.command.config_value $name”来获取$name的配置值。
注:subprocess - Subprocesses with accessible I/O streams. This module allows you to spawn processes, connect to their input/output/error pipes, and obtain their return codes.
backtype.storm.command.config_value是backtype/storm/command/config_value.clj生成的。
config_value.clj代码如下:
(ns backtype.storm.command.config-value (:use [backtype.storm config log]) (:gen-class)) (defn -main [^String name] (let [conf (read-storm-config)] (println "VALUE:" (conf name)) ))
(ns backtype.storm.command.config-value //指定命名空间
(:use [backtype.storm config log]) //引入了config.clj和log.clj
(defn -main [^String name]
(let [conf (read-storm-config)]
(println "VALUE:" (conf name))
)) //config_value.clj的main方法是执行conf.clj的read-storm-config,将返回结果打印出来。
read-storm-config函数就是在config.clj中实现的:
(defn read-storm-config []
(clojurify-structure (Utils/readStormConfig)))
其中Utils/readStormConfig表示使用了java的backtype.storm.utils包中的Utils类的readStormConfig方法,其定义如下:
public static Map readStormConfig() { Map ret = readDefaultConfig(); Map storm = findAndReadConfigFile("storm.yaml", false); //读取storm.yaml文件 ret.putAll(storm); ret.putAll(readCommandLineOpts()); return ret; } public static Map readDefaultConfig() { return findAndReadConfigFile("defaults.yaml", true); //读取defaults.yaml文件 } public static Map findAndReadConfigFile(String name) { return findAndReadConfigFile(name, true); } public static Map findAndReadConfigFile(String name, boolean mustExist) { try { List<URL> resources = findResources(name); if(resources.isEmpty()) { if(mustExist) throw new RuntimeException("Could not find config file on classpath " + name); else return new HashMap(); } if(resources.size() > 1) { throw new RuntimeException("Found multiple " + name + " resources. You're probably bundling the Storm jars with your topology jar."); } URL resource = resources.get(0); Yaml yaml = new Yaml(); Map ret = (Map) yaml.load(new InputStreamReader(resource.openStream())); if(ret==null) ret = new HashMap(); return new HashMap(ret); } catch (IOException e) { throw new RuntimeException(e); } }
可见storm的默认配置是在default.yaml和storm.yaml中。贴上一个测试的最终执行进程信息:
Running: java -client -Dstorm.options= -Dstorm.home=/opt/storm/storm-0.8.0 -Djava.library.path=/usr/local/lib:/opt/local/lib:/usr/lib -cp /opt/storm/storm-0.8.0/storm-0.8.0.jar:/opt/storm/storm-0.8.0/lib/jgrapht-0.8.3.jar:/opt/storm/storm-0.8.0/lib/servlet-api-2.5-20081211.jar:/opt/storm/storm-0.8.0/lib/curator-client-1.0.1.jar:/opt/storm/storm-0.8.0/lib/tools.cli-0.2.2.jar:/opt/storm/storm-0.8.0/lib/clout-0.4.1.jar:/opt/storm/storm-0.8.0/lib/guava-10.0.1.jar:/opt/storm/storm-0.8.0/lib/jsr305-1.3.9.jar:/opt/storm/storm-0.8.0/lib/jetty-util-6.1.26.jar:/opt/storm/storm-0.8.0/lib/commons-logging-1.1.1.jar:/opt/storm/storm-0.8.0/lib/curator-framework-1.0.1.jar:/opt/storm/storm-0.8.0/lib/commons-exec-1.1.jar:/opt/storm/storm-0.8.0/lib/kryo-2.17.jar:/opt/storm/storm-0.8.0/lib/ring-servlet-0.3.11.jar:/opt/storm/storm-0.8.0/lib/math.numeric-tower-0.0.1.jar:/opt/storm/storm-0.8.0/lib/reflectasm-1.07-shaded.jar:/opt/storm/storm-0.8.0/lib/ring-jetty-adapter-0.3.11.jar:/opt/storm/storm-0.8.0/lib/jline-0.9.94.jar:/opt/storm/storm-0.8.0/lib/httpcore-4.1.jar:/opt/storm/storm-0.8.0/lib/jetty-6.1.26.jar:/opt/storm/storm-0.8.0/lib/slf4j-log4j12-1.5.8.jar:/opt/storm/storm-0.8.0/lib/commons-fileupload-1.2.1.jar:/opt/storm/storm-0.8.0/lib/slf4j-api-1.5.8.jar:/opt/storm/storm-0.8.0/lib/clojure-1.4.0.jar:/opt/storm/storm-0.8.0/lib/json-simple-1.1.jar:/opt/storm/storm-0.8.0/lib/asm-4.0.jar:/opt/storm/storm-0.8.0/lib/ring-core-0.3.10.jar:/opt/storm/storm-0.8.0/lib/commons-io-1.4.jar:/opt/storm/storm-0.8.0/lib/junit-3.8.1.jar:/opt/storm/storm-0.8.0/lib/httpclient-4.1.1.jar:/opt/storm/storm-0.8.0/lib/disruptor-2.10.1.jar:/opt/storm/storm-0.8.0/lib/tools.logging-0.2.3.jar:/opt/storm/storm-0.8.0/lib/tools.macro-0.1.0.jar:/opt/storm/storm-0.8.0/lib/commons-codec-1.4.jar:/opt/storm/storm-0.8.0/lib/minlog-1.2.jar:/opt/storm/storm-0.8.0/lib/joda-time-2.0.jar:/opt/storm/storm-0.8.0/lib/snakeyaml-1.9.jar:/opt/storm/storm-0.8.0/lib/commons-lang-2.5.jar:/opt/storm/storm-0.8.0/lib/log4j-1.2.16.jar:/opt/storm/storm-0.8.0/lib/servlet-api-2.5.jar:/opt/storm/storm-0.8.0/lib/hiccup-0.3.6.jar:/opt/storm/storm-0.8.0/lib/zookeeper-3.3.3.jar:/opt/storm/storm-0.8.0/lib/core.incubator-0.1.0.jar:/opt/storm/storm-0.8.0/lib/carbonite-1.5.0.jar:/opt/storm/storm-0.8.0/lib/libthrift7-0.7.0.jar:/opt/storm/storm-0.8.0/lib/objenesis-1.2.jar:/opt/storm/storm-0.8.0/lib/clj-time-0.4.1.jar:/opt/storm/storm-0.8.0/lib/compojure-0.6.4.jar:/opt/storm/storm-0.8.0/lib/jzmq-2.1.0.jar:xxx.jar:/home/storm/.storm:/opt/storm/storm-0.8.0/bin -Dstorm.jar=xxx.jar Test arg1
2.使用系统调用启动java进程
if fork: os.spawnvp(os.P_WAIT, "java", all_args) else: os.execvp("java", all_args) # replaces the current process and never returns
此处是使用fork或exec来启动进程,实际使用的是exec。至于fork和exec的区别,可以参考http://www.cnblogs.com/jerryshao2015/p/4432060.html