解析GenericOptionsParser
hadoop源代码分析(4)-org.apache.hadoop.util包-GenericOptionsParser类【原创】
一 准备
hadoop版本:1.0.3,GenericOptionsParser所在的包:org.apache.hadoop.util
学习方法:理解GenericOptionsParser这个解析器的使用地方,从构造函数入手,理解GenericOptionsParser整个类的使用情况。
时间:2013-02-21
二 GenericOptionsParser功能描述
GenericOptionsParser是hadoop框架中解析命令行参数的基本类。它能够辨别一些标准的命令行参数,能够使应用程序轻易地指定namenode,jobtracker,以及其他额外的配置资源。一般它使用方法如下:
1 out.println("Generic options supported are"); 2 out.println("-conf <configuration file> specify an application configuration file"); 3 out.println("-D <property=value> use value for given property"); 4 out.println("-fs <local|namenode:port> specify a namenode"); 5 out.println("-jt <local|jobtracker:port> specify a job tracker"); 6 out.println("-files <comma separated list of files> " + 7 "specify comma separated files to be copied to the map reduce cluster"); 8 out.println("-libjars <comma separated list of jars> " + 9 "specify comma separated jar files to include in the classpath."); 10 out.println("-archives <comma separated list of archives> " + 11 "specify comma separated archives to be unarchived" + 12 " on the compute machines.\n"); 13 out.println("The general command line syntax is"); 14 out.println("bin/hadoop command [genericOptions] [commandOptions]\n");
使用示例:
1 * $ bin/hadoop dfs -fs darwin:8020 -ls /data 2 * list /data directory in dfs with namenode darwin:8020 3 * 4 * $ bin/hadoop dfs -D fs.default.name=darwin:8020 -ls /data 5 * list /data directory in dfs with namenode darwin:8020 6 * 7 * $ bin/hadoop dfs -conf hadoop-site.xml -ls /data 8 * list /data directory in dfs with conf specified in hadoop-site.xml 9 * 10 * $ bin/hadoop job -D mapred.job.tracker=darwin:50020 -submit job.xml 11 * submit a job to job tracker darwin:50020 12 * 13 * $ bin/hadoop job -jt darwin:50020 -submit job.xml 14 * submit a job to job tracker darwin:50020 15 * 16 * $ bin/hadoop job -jt local -submit job.xml 17 * submit a job to local runner 18 * 19 * $ bin/hadoop jar -libjars testlib.jar 20 * -archives test.tgz -files file.txt inputjar args 21 * job submission with libjars, files and archives
四 GenericOptionsParser主要方法、属性分析
GenericOptionsParser这个类是从构造函数开始的,它有多个构造函数,真正的处理是在parseGeneralOptions(options, conf, args)这个函数中。
1 /** 2 * 构造GenericOptionsParser来解析给定的选项以及基本的hadoop选项 3 * 命令行对象可以通过getCommandLine()函数获得 4 * @param conf the configuration to modify 5 * @param options options built by the caller 6 * @param args User-specified arguments 7 * @throws IOException 8 */ 9 public GenericOptionsParser(Configuration conf, 10 Options options, String[] args) throws IOException { 11 parseGeneralOptions(options, conf, args); 12 this.conf = conf; 13 }
parseGeneralOptions(options, conf, args)这个函数解析用户指定的参数,获取基本选项以及根据需要修改配置。它首先指定每个通用选项的属性,然后解析选项,参数,把它转化为命令行对象(CommandLine),紧接着把设定好的命令行参数写入系统配置,源代码如下:
1 /** 2 * 解析用户指定的参数,获取基本选项以及根据需要修改配置 3 * Parse the user-specified options, get the generic options, and modify 4 * configuration accordingly 5 * @param conf Configuration to be modified 6 * @param args User-specified arguments 7 * @return Command-specific arguments 8 */ 9 private String[] parseGeneralOptions(Options opts, Configuration conf, 10 String[] args) throws IOException { 11 // 指定每个通用选项的属性 12 opts = buildGeneralOptions(opts); 13 CommandLineParser parser = new GnuParser(); 14 try { 15 // 解析选项,参数,获取命令行 16 commandLine = parser.parse(opts, args, true); 17 // 根据用户指定的参数(commandLine)修改系统的配置 18 processGeneralOptions(conf, commandLine); 19 return commandLine.getArgs(); 20 } catch(ParseException e) { 21 LOG.warn("options parsing failed: "+e.getMessage()); 22 23 HelpFormatter formatter = new HelpFormatter(); 24 formatter.printHelp("general options are: ", opts); 25 } 26 return args; 27 }
processGeneralOptions函数作用是修改配置,利用CommandLine对象的相关方法,这个类包含处理选项以及选项描述,选项值的方法,源代码如下:
1 /** 2 * 根据用户指定的参数修改配置 3 * Modify configuration according user-specified generic options 4 * @param conf Configuration to be modified 5 * @param line User-specified generic options 6 */ 7 private void processGeneralOptions(Configuration conf, 8 CommandLine line) throws IOException { 9 if (line.hasOption("fs")) { 10 // 设置NAMENODE的ip 11 FileSystem.setDefaultUri(conf, line.getOptionValue("fs")); 12 } 13 14 if (line.hasOption("jt")) { 15 conf.set("mapred.job.tracker", line.getOptionValue("jt")); 16 } 17 if (line.hasOption("conf")) { 18 String[] values = line.getOptionValues("conf"); 19 for(String value : values) { 20 // 新增配置文件,除非是final属性,不然新配置文件会覆盖旧的配置文件 21 conf.addResource(new Path(value)); 22 } 23 } 24 if (line.hasOption("libjars")) { 25 conf.set("tmpjars", 26 validateFiles(line.getOptionValue("libjars"), conf)); 27 //setting libjars in client classpath 28 URL[] libjars = getLibJars(conf); 29 if(libjars!=null && libjars.length>0) { 30 conf.setClassLoader(new URLClassLoader(libjars, conf.getClassLoader())); 31 Thread.currentThread().setContextClassLoader( 32 new URLClassLoader(libjars, 33 Thread.currentThread().getContextClassLoader())); 34 } 35 } 36 if (line.hasOption("files")) { 37 conf.set("tmpfiles", 38 validateFiles(line.getOptionValue("files"), conf)); 39 } 40 if (line.hasOption("archives")) { 41 conf.set("tmparchives", 42 validateFiles(line.getOptionValue("archives"), conf)); 43 } 44 if (line.hasOption('D')) { 45 String[] property = line.getOptionValues('D'); 46 for(String prop : property) { 47 String[] keyval = prop.split("=", 2); 48 if (keyval.length == 2) { 49 conf.set(keyval[0], keyval[1]); 50 } 51 } 52 } 53 conf.setBoolean("mapred.used.genericoptionsparser", true); 54 55 // tokensFile 56 if(line.hasOption("tokenCacheFile")) { 57 String fileName = line.getOptionValue("tokenCacheFile"); 58 // check if the local file exists 59 try 60 { 61 FileSystem localFs = FileSystem.getLocal(conf); 62 Path p = new Path(fileName); 63 if (!localFs.exists(p)) { 64 throw new FileNotFoundException("File "+fileName+" does not exist."); 65 } 66 67 LOG.debug("setting conf tokensFile: " + fileName); 68 conf.set("mapreduce.job.credentials.json", 69 localFs.makeQualified(p).toString()); 70 } catch (IOException e) { 71 throw new RuntimeException(e); 72 } 73 } 74 }
五 GenericOptionsParser相关类、接口简述
跟这个类相关的类是:Options,Option,ComandLine,FileSystem。