org.apache.hadoop.conf-Configuration
终于遇到第一块硬骨头
Hadoop没有使用java.util.Properties管理配置文件,而是自己定义了一套配置文件管理系统和自己的API。
1 package org.apache.hadoop.conf; 2 3 import java.io.BufferedInputStream; 4 import java.io.DataInput; 5 import java.io.DataOutput; 6 import java.io.File; 7 import java.io.FileInputStream; 8 import java.io.IOException; 9 import java.io.InputStream; 10 import java.io.InputStreamReader; 11 import java.io.OutputStream; 12 import java.io.Reader; 13 import java.io.Writer; 14 import java.net.URL; 15 import java.util.ArrayList; 16 import java.util.Collection; 17 import java.util.Enumeration; 18 import java.util.HashMap; 19 import java.util.HashSet; 20 import java.util.Iterator; 21 import java.util.List; 22 import java.util.ListIterator; 23 import java.util.Map; 24 import java.util.Properties; 25 import java.util.Set; 26 import java.util.StringTokenizer; 27 import java.util.WeakHashMap; 28 import java.util.concurrent.CopyOnWriteArrayList; 29 import java.util.regex.Matcher; 30 import java.util.regex.Pattern; 31 //引入了IO的流类 32 //引入了网络编程类,用来封装或获取网络资源 33 //引入了工具包中的集合类。其中StringTokenier在hadoop中用的非常多,特别是在mapreduce编程中,经常需要这个工具类来处理数据。 34 //它起着分词器的作用。 35 //关于CopyOnWriteArrayList,之前没接触过,具体推荐 http://blog.csdn.net/imzoer/article/details/9751591,就是实现了线程安全 36 //关于regex.Matcher和regex.Pattern,看regex可知就是正则类,具体用法不太一样 http://ningtukun.blog.163.com/blog/static/186541445201292984311656/ 37 import javax.xml.parsers.DocumentBuilder; 38 import javax.xml.parsers.DocumentBuilderFactory; 39 import javax.xml.parsers.ParserConfigurationException; 40 import javax.xml.transform.Transformer; 41 import javax.xml.transform.TransformerFactory; 42 import javax.xml.transform.dom.DOMSource; 43 import javax.xml.transform.stream.StreamResult; 44 45 import org.apache.commons.logging.Log; 46 import org.apache.commons.logging.LogFactory; 47 import org.apache.hadoop.fs.FileSystem; 48 import org.apache.hadoop.fs.Path; 49 import org.apache.hadoop.io.Writable; 50 import org.apache.hadoop.io.WritableUtils; 51 import org.apache.hadoop.util.StringUtils; 52 import org.codehaus.jackson.JsonFactory; 53 import org.codehaus.jackson.JsonGenerator; 54 import org.w3c.dom.DOMException; 55 import org.w3c.dom.Document; 56 import org.w3c.dom.Element; 57 import org.w3c.dom.Node; 58 import org.w3c.dom.NodeList; 59 import org.w3c.dom.Text; 60 import org.xml.sax.SAXException; 61 62 /** 63 * Provides access to configuration parameters. 64 * 65 * <h4 id="Resources">Resources</h4> 66 * 67 * <p>Configurations are specified by resources. A resource contains a set of 68 * name/value pairs as XML data. Each resource is named by either a 69 * <code>String</code> or by a {@link Path}. If named by a <code>String</code>, 70 * then the classpath is examined for a file with that name. If named by a 71 * <code>Path</code>, then the local filesystem is examined directly, without 72 * referring to the classpath. 73 * 74 * <p>Unless explicitly turned off, Hadoop by default specifies two 75 * resources, loaded in-order from the classpath: <ol> 76 * <li><tt><a href="{@docRoot}/../core-default.html">core-default.xml</a> 77 * </tt>: Read-only defaults for hadoop.</li> 78 * <li><tt>core-site.xml</tt>: Site-specific configuration for a given hadoop 79 * installation.</li> 80 * </ol> 81 * Applications may add additional resources, which are loaded 82 * subsequent to these resources in the order they are added. 83 * 84 * <h4 id="FinalParams">Final Parameters</h4> 85 * 86 * <p>Configuration parameters may be declared <i>final</i>. 87 * Once a resource declares a value final, no subsequently-loaded 88 * resource can alter that value. 89 * For example, one might define a final parameter with: 90 * <tt><pre> 91 * <property> 92 * <name>dfs.client.buffer.dir</name> 93 * <value>/tmp/hadoop/dfs/client</value> 94 * <b><final>true</final></b> 95 * </property></pre></tt> 96 * 97 * Administrators typically define parameters as final in 98 * <tt>core-site.xml</tt> for values that user applications may not alter. 99 * 100 * <h4 id="VariableExpansion">Variable Expansion</h4> 101 * 102 * <p>Value strings are first processed for <i>variable expansion</i>. The 103 * available properties are:<ol> 104 * <li>Other properties defined in this Configuration; and, if a name is 105 * undefined here,</li> 106 * <li>Properties in {@link System#getProperties()}.</li> 107 * </ol> 108 * 109 * <p>For example, if a configuration resource contains the following property 110 * definitions: 111 * <tt><pre> 112 * <property> 113 * <name>basedir</name> 114 * <value>/user/${<i>user.name</i>}</value> 115 * </property> 116 * 117 * <property> 118 * <name>tempdir</name> 119 * <value>${<i>basedir</i>}/tmp</value> 120 * </property></pre></tt> 121 * 122 * When <tt>conf.get("tempdir")</tt> is called, then <tt>${<i>basedir</i>}</tt> 123 * will be resolved to another property in this Configuration, while 124 * <tt>${<i>user.name</i>}</tt> would then ordinarily be resolved to the value 125 * of the System property with that name. 126 */ 127 //一大段注释。第一句说这个类是用来提供访问配置中属性的渠道 128 //第二段话说配置文件的构造,XML文件中键值对。 129 //第三段话说除非主动关闭,否则hadoop会默认加载一些默认的配置文件。并且可以“重载” 130 //第四段说了配置文件中属性标记为final会怎么样 131 //第五段说配置文件中的属性可以用变量来表示,可以不是具体的实值。还举了个例子。 132 public class Configuration implements Iterable<Map.Entry<String,String>>, 133 Writable { 134 //实现了Iterable和Writable接口。 135 //实现Iterable接口,可以调用Iterator()方法进行迭代 136 //关于Map.Entry类,之前不了解。就是map的一种方便的遍历工具类 137 //实现了Writable接口。hadoop没有采用Java的序列化(具体原因不解释),而是引入了自己的序列化系统,所有的 138 //序列化对象都要实现writable接口。以后会遇到。 139 private static final Log LOG = 140 LogFactory.getLog(Configuration.class); 141 //创建了一个日志类。并做了初始化 142 private boolean quietmode = true; 143 //布尔变量quietmode,“安静模式”,用来确定加载配置的时候日志的某些动作, 144 //当为true的时候则在加载解析配置文件的过程中不输出日志信息,反之...... 145 /** 146 * List of configuration resources. 147 */ 148 private ArrayList<Object> resources = new ArrayList<Object>(); 149 //保存了所有通过addResource()方法添加的Configuration对象的资源 150 /** 151 * List of configuration parameters marked <b>final</b>. 152 */ 153 private Set<String> finalParameters = new HashSet<String>(); 154 //用来保存所有在配置文件中已经被声明为final的键–值对的键 155 private boolean loadDefaults = true; 156 //是否加载默认的配置资源 157 /** 158 * Configuration objects 159 */ 160 private static final WeakHashMap<Configuration,Object> REGISTRY = 161 new WeakHashMap<Configuration,Object>(); 162 //REGISTRY是一个WeakHashMap的变量,key为Configuration,value为Object, 163 //可以看出这个对象存储了不同对象的多个配置信息,弱HashMap可以自动清除不在正常使用的键对应的条目, 164 //发现如果这个值是null会重新加载默认的配置文件中的信息 165 /** 166 * List of default Resources. Resources are loaded in the order of the list 167 * entries 168 */ 169 private static final CopyOnWriteArrayList<String> defaultResources = 170 new CopyOnWriteArrayList<String>(); 171 //存放的是默认的配置信息,通过方法addDefaultResource()可以添加系统的默认资源 172 //存储配置文件的名字而不是配置文件的全路径 173 /** 174 * Flag to indicate if the storage of resource which updates a key needs 175 * to be stored for each key 176 */ 177 private boolean storeResource; 178 //是否需要更新配置文件的标识 179 /** 180 * Stores the mapping of key to the resource which modifies or loads 181 * the key most recently 182 */ 183 private HashMap<String, String> updatingResource; 184 //保存所有需要更新的配置文件 185 static{ 186 //print deprecation warning if hadoop-site.xml is found in classpath 187 ClassLoader cL = Thread.currentThread().getContextClassLoader(); 188 if (cL == null) { 189 cL = Configuration.class.getClassLoader(); 190 } 191 if(cL.getResource("hadoop-site.xml")!=null) { 192 LOG.warn("DEPRECATED: hadoop-site.xml found in the classpath. " + 193 "Usage of hadoop-site.xml is deprecated. Instead use core-site.xml, " 194 + "mapred-site.xml and hdfs-site.xml to override properties of " + 195 "core-default.xml, mapred-default.xml and hdfs-default.xml " + 196 "respectively"); 197 } 198 addDefaultResource("core-default.xml"); 199 addDefaultResource("core-site.xml"); 200 } 201 //静态代码块。加载了两个核心配置文件,一个是默认的,一个是用户自己配置的。 202 //如果加载了hadoop-site.xml,则发个警告信息,说它不推荐用,以及推荐用哪个 203 private Properties properties; 204 //Hadoop配置文件解析后的键–值对,都存放在properties中 205 private Properties overlay; 206 //变量overlay用于记录通过set()方式改变的配置项。也就是说,出现在overlay中的键–值对是应用设置的, 207 //而不是通过对配置资源解析得到的 208 private ClassLoader classLoader; 209 { 210 classLoader = Thread.currentThread().getContextClassLoader(); 211 if (classLoader == null) { 212 classLoader = Configuration.class.getClassLoader(); 213 } 214 } 215 //定义了一个类加载器,并做了初始化。非静态代码块 216 /** A new configuration. */ 217 public Configuration() { 218 this(true); 219 } 220 //相当于空构造方法,或者说调用空构造方法的时候默认调用默认配置文件 221 /** A new configuration where the behavior of reading from the default 222 * resources can be turned off. 223 * 224 * If the parameter {@code loadDefaults} is false, the new instance 225 * will not load resources from the default files. 226 * @param loadDefaults specifies whether to load from the default files 227 */ 228 public Configuration(boolean loadDefaults) { 229 this.loadDefaults = loadDefaults; 230 if (LOG.isDebugEnabled()) { 231 LOG.debug(StringUtils.stringifyException(new IOException("config()"))); 232 } 233 synchronized(Configuration.class) { 234 REGISTRY.put(this, null); 235 } 236 this.storeResource = false; 237 } 238 //构造方法。参数是是否加载默认配置文件 239 /** 240 * A new configuration with the same settings and additional facility for 241 * storage of resource to each key which loads or updates 242 * the key most recently 243 * @param other the configuration from which to clone settings 244 * @param storeResource flag to indicate if the storage of resource to 245 * each key is to be stored 246 */ 247 private Configuration(Configuration other, boolean storeResource) { 248 this(other); 249 this.loadDefaults = other.loadDefaults; 250 this.storeResource = storeResource; 251 if (storeResource) { 252 updatingResource = new HashMap<String, String>(); 253 } 254 } 255 //构造方法。加载了一个新的配置文件和现有的配置文件具有相同的一些配置并有一些新的配置 256 /** 257 * A new configuration with the same settings cloned from another. 258 * 259 * @param other the configuration from which to clone settings. 260 */ 261 @SuppressWarnings("unchecked") 262 public Configuration(Configuration other) { 263 if (LOG.isDebugEnabled()) { 264 LOG.debug(StringUtils.stringifyException 265 (new IOException("config(config)"))); 266 } 267 268 this.resources = (ArrayList)other.resources.clone(); 269 synchronized(other) { 270 if (other.properties != null) { 271 this.properties = (Properties)other.properties.clone(); 272 } 273 274 if (other.overlay!=null) { 275 this.overlay = (Properties)other.overlay.clone(); 276 } 277 } 278 279 this.finalParameters = new HashSet<String>(other.finalParameters); 280 synchronized(Configuration.class) { 281 REGISTRY.put(this, null); 282 } 283 } 284 //构造方法。加载了一个全新的配置文件,并克隆了其属性 285 /** 286 * Add a default resource. Resources are loaded in the order of the resources 287 * added. 288 * @param name file name. File should be present in the classpath. 289 */ 290 public static synchronized void addDefaultResource(String name) { 291 if(!defaultResources.contains(name)) { 292 defaultResources.add(name); 293 for(Configuration conf : REGISTRY.keySet()) { 294 if(conf.loadDefaults) { 295 conf.reloadConfiguration(); 296 } 297 } 298 } 299 } 300 //加载默认配置文件方法 301 /** 302 * Add a configuration resource. 303 * 304 * The properties of this resource will override properties of previously 305 * added resources, unless they were marked <a href="#Final">final</a>. 306 * 307 * @param name resource to be added, the classpath is examined for a file 308 * with that name. 309 */ 310 public void addResource(String name) { 311 addResourceObject(name); 312 } 313 314 /** 315 * Add a configuration resource. 316 * 317 * The properties of this resource will override properties of previously 318 * added resources, unless they were marked <a href="#Final">final</a>. 319 * 320 * @param url url of the resource to be added, the local filesystem is 321 * examined directly to find the resource, without referring to 322 * the classpath. 323 */ 324 public void addResource(URL url) { 325 addResourceObject(url); 326 } 327 328 /** 329 * Add a configuration resource. 330 * 331 * The properties of this resource will override properties of previously 332 * added resources, unless they were marked <a href="#Final">final</a>. 333 * 334 * @param file file-path of resource to be added, the local filesystem is 335 * examined directly to find the resource, without referring to 336 * the classpath. 337 */ 338 public void addResource(Path file) { 339 addResourceObject(file); 340 } 341 342 /** 343 * Add a configuration resource. 344 * 345 * The properties of this resource will override properties of previously 346 * added resources, unless they were marked <a href="#Final">final</a>. 347 * 348 * @param in InputStream to deserialize the object from. 349 */ 350 public void addResource(InputStream in) { 351 addResourceObject(in); 352 } 353 //Hadoop在创建配置类的时候,考虑了三种资源: 354 // 355 //URL资源(网络资源,指的是一个链接); 356 // 357 //CLASSPATH资源(String形式); 358 // 359 //Hadoop文件系统中的Path资源(该资源是基于Hadoop的FileSystem的,使用斜线“/”作为分隔符,如果是绝对路径,应该以“/”开始) 360 /** 361 * Reload configuration from previously added resources. 362 * 363 * This method will clear all the configuration read from the added 364 * resources, and final parameters. This will make the resources to 365 * be read again before accessing the values. Values that are added 366 * via set methods will overlay values read from the resources. 367 */ 368 public synchronized void reloadConfiguration() { 369 properties = null; // trigger reload 370 finalParameters.clear(); // clear site-limits 371 } 372 //重新加载先前加载过的配置资源。加载前会先清空。加了线程并发关键字哦 373 private synchronized void addResourceObject(Object resource) { 374 resources.add(resource); // add to resources 375 reloadConfiguration(); 376 } 377 //注意resource是Object类型。会触发配置的重新加载 378 private static Pattern varPat = Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}"); 379 private static int MAX_SUBST = 20; 380 381 private String substituteVars(String expr) { 382 if (expr == null) { 383 return null; 384 } 385 Matcher match = varPat.matcher(""); 386 String eval = expr; 387 for(int s=0; s<MAX_SUBST; s++) { 388 match.reset(eval); 389 if (!match.find()) { 390 return eval; 391 } 392 String var = match.group(); 393 var = var.substring(2, var.length()-1); // remove ${ .. } 394 String val = null; 395 try { 396 val = System.getProperty(var); 397 } catch(SecurityException se) { 398 LOG.warn("Unexpected SecurityException in Configuration", se); 399 } 400 if (val == null) { 401 val = getRaw(var); 402 } 403 if (val == null) { 404 return eval; // return literal ${var}: var is unbound 405 } 406 // substitute 407 eval = eval.substring(0, match.start())+val+eval.substring(match.end()); 408 } 409 throw new IllegalStateException("Variable substitution depth too large: " 410 + MAX_SUBST + " " + expr); 411 } 412 //属性扩展属性和方法。 413 //varPat对含有环境变量的值的进行转换的正则表达式对象; 414 //MAX_SUBST是循环次数,为了避免死循环; 415 //当循环次数过多的时候抛异常。 416 /** 417 * Get the value of the <code>name</code> property, <code>null</code> if 418 * no such property exists. 419 * 420 * Values are processed for <a href="#VariableExpansion">variable expansion</a> 421 * before being returned. 422 * 423 * @param name the property name. 424 * @return the value of the <code>name</code> property, 425 * or null if no such property exists. 426 */ 427 public String get(String name) { 428 return substituteVars(getProps().getProperty(name)); 429 } 430 431 /** 432 * Get the value of the <code>name</code> property, without doing 433 * <a href="#VariableExpansion">variable expansion</a>. 434 * 435 * @param name the property name. 436 * @return the value of the <code>name</code> property, 437 * or null if no such property exists. 438 */ 439 public String getRaw(String name) { 440 return getProps().getProperty(name); 441 } 442 //两种根据名称取得属性值的方法。没有这个属性就返回NULL。 443 //区别是第一个会进行属性扩展,第二个不会 444 /** 445 * Set the <code>value</code> of the <code>name</code> property. 446 * 447 * @param name property name. 448 * @param value property value. 449 */ 450 public void set(String name, String value) { 451 getOverlay().setProperty(name, value); 452 getProps().setProperty(name, value); 453 } 454 455 /** 456 * Sets a property if it is currently unset. 457 * @param name the property name 458 * @param value the new value 459 */ 460 public void setIfUnset(String name, String value) { 461 if (get(name) == null) { 462 set(name, value); 463 } 464 } 465 //两种根据名称设置值得方法。区别不解释 466 private synchronized Properties getOverlay() { 467 if (overlay==null){ 468 overlay=new Properties(); 469 } 470 return overlay; 471 } 472 473 /** 474 * Get the value of the <code>name</code> property. If no such property 475 * exists, then <code>defaultValue</code> is returned. 476 * 477 * @param name property name. 478 * @param defaultValue default value. 479 * @return property value, or <code>defaultValue</code> if the property 480 * doesn't exist. 481 */ 482 public String get(String name, String defaultValue) { 483 return substituteVars(getProps().getProperty(name, defaultValue)); 484 } 485 486 /** 487 * Get the value of the <code>name</code> property as an <code>int</code>. 488 * 489 * If no such property exists, or if the specified value is not a valid 490 * <code>int</code>, then <code>defaultValue</code> is returned. 491 * 492 * @param name property name. 493 * @param defaultValue default value. 494 * @return property value as an <code>int</code>, 495 * or <code>defaultValue</code>. 496 */ 497 public int getInt(String name, int defaultValue) { 498 String valueString = get(name); 499 if (valueString == null) 500 return defaultValue; 501 try { 502 String hexString = getHexDigits(valueString); 503 if (hexString != null) { 504 return Integer.parseInt(hexString, 16); 505 } 506 return Integer.parseInt(valueString); 507 } catch (NumberFormatException e) { 508 return defaultValue; 509 } 510 } 511 512 /** 513 * Set the value of the <code>name</code> property to an <code>int</code>. 514 * 515 * @param name property name. 516 * @param value <code>int</code> value of the property. 517 */ 518 public void setInt(String name, int value) { 519 set(name, Integer.toString(value)); 520 } 521 522 523 /** 524 * Get the value of the <code>name</code> property as a <code>long</code>. 525 * If no such property is specified, or if the specified value is not a valid 526 * <code>long</code>, then <code>defaultValue</code> is returned. 527 * 528 * @param name property name. 529 * @param defaultValue default value. 530 * @return property value as a <code>long</code>, 531 * or <code>defaultValue</code>. 532 */ 533 public long getLong(String name, long defaultValue) { 534 String valueString = get(name); 535 if (valueString == null) 536 return defaultValue; 537 try { 538 String hexString = getHexDigits(valueString); 539 if (hexString != null) { 540 return Long.parseLong(hexString, 16); 541 } 542 return Long.parseLong(valueString); 543 } catch (NumberFormatException e) { 544 return defaultValue; 545 } 546 } 547 548 private String getHexDigits(String value) { 549 boolean negative = false; 550 String str = value; 551 String hexString = null; 552 if (value.startsWith("-")) { 553 negative = true; 554 str = value.substring(1); 555 } 556 if (str.startsWith("0x") || str.startsWith("0X")) { 557 hexString = str.substring(2); 558 if (negative) { 559 hexString = "-" + hexString; 560 } 561 return hexString; 562 } 563 return null; 564 } 565 566 /** 567 * Set the value of the <code>name</code> property to a <code>long</code>. 568 * 569 * @param name property name. 570 * @param value <code>long</code> value of the property. 571 */ 572 public void setLong(String name, long value) { 573 set(name, Long.toString(value)); 574 } 575 576 /** 577 * Get the value of the <code>name</code> property as a <code>float</code>. 578 * If no such property is specified, or if the specified value is not a valid 579 * <code>float</code>, then <code>defaultValue</code> is returned. 580 * 581 * @param name property name. 582 * @param defaultValue default value. 583 * @return property value as a <code>float</code>, 584 * or <code>defaultValue</code>. 585 */ 586 public float getFloat(String name, float defaultValue) { 587 String valueString = get(name); 588 if (valueString == null) 589 return defaultValue; 590 try { 591 return Float.parseFloat(valueString); 592 } catch (NumberFormatException e) { 593 return defaultValue; 594 } 595 } 596 /** 597 * Set the value of the <code>name</code> property to a <code>float</code>. 598 * 599 * @param name property name. 600 * @param value property value. 601 */ 602 public void setFloat(String name, float value) { 603 set(name,Float.toString(value)); 604 } 605 606 /** 607 * Get the value of the <code>name</code> property as a <code>boolean</code>. 608 * If no such property is specified, or if the specified value is not a valid 609 * <code>boolean</code>, then <code>defaultValue</code> is returned. 610 * 611 * @param name property name. 612 * @param defaultValue default value. 613 * @return property value as a <code>boolean</code>, 614 * or <code>defaultValue</code>. 615 */ 616 public boolean getBoolean(String name, boolean defaultValue) { 617 String valueString = get(name); 618 if ("true".equals(valueString)) 619 return true; 620 else if ("false".equals(valueString)) 621 return false; 622 else return defaultValue; 623 } 624 625 /** 626 * Set the value of the <code>name</code> property to a <code>boolean</code>. 627 * 628 * @param name property name. 629 * @param value <code>boolean</code> value of the property. 630 */ 631 public void setBoolean(String name, boolean value) { 632 set(name, Boolean.toString(value)); 633 } 634 635 /** 636 * Set the given property, if it is currently unset. 637 * @param name property name 638 * @param value new value 639 */ 640 public void setBooleanIfUnset(String name, boolean value) { 641 setIfUnset(name, Boolean.toString(value)); 642 } 643 644 /** 645 * Set the value of the <code>name</code> property to the given type. This 646 * is equivalent to <code>set(<name>, value.toString())</code>. 647 * @param name property name 648 * @param value new value 649 */ 650 public <T extends Enum<T>> void setEnum(String name, T value) { 651 set(name, value.toString()); 652 } 653 654 /** 655 * Return value matching this enumerated type. 656 * @param name Property name 657 * @param defaultValue Value returned if no mapping exists 658 * @throws IllegalArgumentException If mapping is illegal for the type 659 * provided 660 */ 661 public <T extends Enum<T>> T getEnum(String name, T defaultValue) { 662 final String val = get(name); 663 return null == val 664 ? defaultValue 665 : Enum.valueOf(defaultValue.getDeclaringClass(), val); 666 } 667 //hadoop常用类型的get/set方法,其中有的做了转换。 668 /** 669 * A class that represents a set of positive integer ranges. It parses 670 * strings of the form: "2-3,5,7-" where ranges are separated by comma and 671 * the lower/upper bounds are separated by dash. Either the lower or upper 672 * bound may be omitted meaning all values up to or over. So the string 673 * above means 2, 3, 5, and 7, 8, 9, ... 674 */ 675 public static class IntegerRanges { 676 private static class Range { 677 int start; 678 int end; 679 } 680 681 List<Range> ranges = new ArrayList<Range>(); 682 683 public IntegerRanges() { 684 } 685 686 public IntegerRanges(String newValue) { 687 StringTokenizer itr = new StringTokenizer(newValue, ","); 688 while (itr.hasMoreTokens()) { 689 String rng = itr.nextToken().trim(); 690 String[] parts = rng.split("-", 3); 691 if (parts.length < 1 || parts.length > 2) { 692 throw new IllegalArgumentException("integer range badly formed: " + 693 rng); 694 } 695 Range r = new Range(); 696 r.start = convertToInt(parts[0], 0); 697 if (parts.length == 2) { 698 r.end = convertToInt(parts[1], Integer.MAX_VALUE); 699 } else { 700 r.end = r.start; 701 } 702 if (r.start > r.end) { 703 throw new IllegalArgumentException("IntegerRange from " + r.start + 704 " to " + r.end + " is invalid"); 705 } 706 ranges.add(r); 707 } 708 } 709 //嵌套类。所以org.apache.hadoop.conf包中有四个类。 710 //解释一种字符串表示的整数范围。注释中有例子。 711 /** 712 * Convert a string to an int treating empty strings as the default value. 713 * @param value the string value 714 * @param defaultValue the value for if the string is empty 715 * @return the desired integer 716 */ 717 private static int convertToInt(String value, int defaultValue) { 718 String trim = value.trim(); 719 if (trim.length() == 0) { 720 return defaultValue; 721 } 722 return Integer.parseInt(trim); 723 } 724 //字符串转整数方法。如果是空的就是用默认值 725 /** 726 * Is the given value in the set of ranges 727 * @param value the value to check 728 * @return is the value in the ranges? 729 */ 730 public boolean isIncluded(int value) { 731 for(Range r: ranges) { 732 if (r.start <= value && value <= r.end) { 733 return true; 734 } 735 } 736 return false; 737 } 738 //不解释 739 @Override 740 public String toString() { 741 StringBuffer result = new StringBuffer(); 742 boolean first = true; 743 for(Range r: ranges) { 744 if (first) { 745 first = false; 746 } else { 747 result.append(','); 748 } 749 result.append(r.start); 750 result.append('-'); 751 result.append(r.end); 752 } 753 return result.toString(); 754 } 755 } 756 //重写了toString方法 757 /** 758 * Parse the given attribute as a set of integer ranges 759 * @param name the attribute name 760 * @param defaultValue the default value if it is not set 761 * @return a new set of ranges from the configured value 762 */ 763 public IntegerRanges getRange(String name, String defaultValue) { 764 return new IntegerRanges(get(name, defaultValue)); 765 } 766 767 /** 768 * Get the comma delimited values of the <code>name</code> property as 769 * a collection of <code>String</code>s. 770 * If no such property is specified then empty collection is returned. 771 * <p> 772 * This is an optimized version of {@link #getStrings(String)} 773 * 774 * @param name property name. 775 * @return property value as a collection of <code>String</code>s. 776 */ 777 public Collection<String> getStringCollection(String name) { 778 String valueString = get(name); 779 return StringUtils.getStringCollection(valueString); 780 } 781 782 /** 783 * Get the comma delimited values of the <code>name</code> property as 784 * an array of <code>String</code>s. 785 * If no such property is specified then <code>null</code> is returned. 786 * 787 * @param name property name. 788 * @return property value as an array of <code>String</code>s, 789 * or <code>null</code>. 790 */ 791 public String[] getStrings(String name) { 792 String valueString = get(name); 793 return StringUtils.getStrings(valueString); 794 } 795 796 /** 797 * Get the comma delimited values of the <code>name</code> property as 798 * an array of <code>String</code>s. 799 * If no such property is specified then default value is returned. 800 * 801 * @param name property name. 802 * @param defaultValue The default value 803 * @return property value as an array of <code>String</code>s, 804 * or default value. 805 */ 806 public String[] getStrings(String name, String... defaultValue) { 807 String valueString = get(name); 808 if (valueString == null) { 809 return defaultValue; 810 } else { 811 return StringUtils.getStrings(valueString); 812 } 813 } 814 815 /** 816 * Set the array of string values for the <code>name</code> property as 817 * as comma delimited values. 818 * 819 * @param name property name. 820 * @param values The values 821 */ 822 public void setStrings(String name, String... values) { 823 set(name, StringUtils.arrayToString(values)); 824 } 825 826 /** 827 * Load a class by name. 828 * 829 * @param name the class name. 830 * @return the class object. 831 * @throws ClassNotFoundException if the class is not found. 832 */ 833 public Class<?> getClassByName(String name) throws ClassNotFoundException { 834 return Class.forName(name, true, classLoader); 835 } 836 837 /** 838 * Get the value of the <code>name</code> property 839 * as an array of <code>Class</code>. 840 * The value of the property specifies a list of comma separated class names. 841 * If no such property is specified, then <code>defaultValue</code> is 842 * returned. 843 * 844 * @param name the property name. 845 * @param defaultValue default value. 846 * @return property value as a <code>Class[]</code>, 847 * or <code>defaultValue</code>. 848 */ 849 public Class<?>[] getClasses(String name, Class<?> ... defaultValue) { 850 String[] classnames = getStrings(name); 851 if (classnames == null) 852 return defaultValue; 853 try { 854 Class<?>[] classes = new Class<?>[classnames.length]; 855 for(int i = 0; i < classnames.length; i++) { 856 classes[i] = getClassByName(classnames[i]); 857 } 858 return classes; 859 } catch (ClassNotFoundException e) { 860 throw new RuntimeException(e); 861 } 862 } 863 864 /** 865 * Get the value of the <code>name</code> property as a <code>Class</code>. 866 * If no such property is specified, then <code>defaultValue</code> is 867 * returned. 868 * 869 * @param name the class name. 870 * @param defaultValue default value. 871 * @return property value as a <code>Class</code>, 872 * or <code>defaultValue</code>. 873 */ 874 public Class<?> getClass(String name, Class<?> defaultValue) { 875 String valueString = get(name); 876 if (valueString == null) 877 return defaultValue; 878 try { 879 return getClassByName(valueString); 880 } catch (ClassNotFoundException e) { 881 throw new RuntimeException(e); 882 } 883 } 884 885 /** 886 * Get the value of the <code>name</code> property as a <code>Class</code> 887 * implementing the interface specified by <code>xface</code>. 888 * 889 * If no such property is specified, then <code>defaultValue</code> is 890 * returned. 891 * 892 * An exception is thrown if the returned class does not implement the named 893 * interface. 894 * 895 * @param name the class name. 896 * @param defaultValue default value. 897 * @param xface the interface implemented by the named class. 898 * @return property value as a <code>Class</code>, 899 * or <code>defaultValue</code>. 900 */ 901 public <U> Class<? extends U> getClass(String name, 902 Class<? extends U> defaultValue, 903 Class<U> xface) { 904 try { 905 Class<?> theClass = getClass(name, defaultValue); 906 if (theClass != null && !xface.isAssignableFrom(theClass)) 907 throw new RuntimeException(theClass+" not "+xface.getName()); 908 else if (theClass != null) 909 return theClass.asSubclass(xface); 910 else 911 return null; 912 } catch (Exception e) { 913 throw new RuntimeException(e); 914 } 915 } 916 917 /** 918 * Set the value of the <code>name</code> property to the name of a 919 * <code>theClass</code> implementing the given interface <code>xface</code>. 920 * 921 * An exception is thrown if <code>theClass</code> does not implement the 922 * interface <code>xface</code>. 923 * 924 * @param name property name. 925 * @param theClass property value. 926 * @param xface the interface implemented by the named class. 927 */ 928 public void setClass(String name, Class<?> theClass, Class<?> xface) { 929 if (!xface.isAssignableFrom(theClass)) 930 throw new RuntimeException(theClass+" not "+xface.getName()); 931 set(name, theClass.getName()); 932 } 933 934 /** 935 * Get a local file under a directory named by <i>dirsProp</i> with 936 * the given <i>path</i>. If <i>dirsProp</i> contains multiple directories, 937 * then one is chosen based on <i>path</i>'s hash code. If the selected 938 * directory does not exist, an attempt is made to create it. 939 * 940 * @param dirsProp directory in which to locate the file. 941 * @param path file-path. 942 * @return local file under the directory with the given path. 943 */ 944 public Path getLocalPath(String dirsProp, String path) 945 throws IOException { 946 String[] dirs = getStrings(dirsProp); 947 int hashCode = path.hashCode(); 948 FileSystem fs = FileSystem.getLocal(this); 949 for (int i = 0; i < dirs.length; i++) { // try each local dir 950 int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length; 951 Path file = new Path(dirs[index], path); 952 Path dir = file.getParent(); 953 if (fs.mkdirs(dir) || fs.exists(dir)) { 954 return file; 955 } 956 } 957 LOG.warn("Could not make " + path + 958 " in local directories from " + dirsProp); 959 for(int i=0; i < dirs.length; i++) { 960 int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length; 961 LOG.warn(dirsProp + "[" + index + "]=" + dirs[index]); 962 } 963 throw new IOException("No valid local directories in property: "+dirsProp); 964 } 965 //从指定路径中获取需要的本地路径。刚开始有点迷惑,注释读的迷迷糊糊,但是网上对这个方法介绍不多 966 //后来查到它的作用时才理解了。它大部分用在taskTracker上,当任务分发时用到的,因为配置文件要拷贝 967 //到本地,临时输出也在本地,不在HDFS上,所以就跟Local文件系统有了交集。下面的这个方法也一样。 968 /** 969 * Get a local file name under a directory named in <i>dirsProp</i> with 970 * the given <i>path</i>. If <i>dirsProp</i> contains multiple directories, 971 * then one is chosen based on <i>path</i>'s hash code. If the selected 972 * directory does not exist, an attempt is made to create it. 973 * 974 * @param dirsProp directory in which to locate the file. 975 * @param path file-path. 976 * @return local file under the directory with the given path. 977 */ 978 public File getFile(String dirsProp, String path) 979 throws IOException { 980 String[] dirs = getStrings(dirsProp); 981 int hashCode = path.hashCode(); 982 for (int i = 0; i < dirs.length; i++) { // try each local dir 983 int index = (hashCode+i & Integer.MAX_VALUE) % dirs.length; 984 File file = new File(dirs[index], path); 985 File dir = file.getParentFile(); 986 if (dir.exists() || dir.mkdirs()) { 987 return file; 988 } 989 } 990 throw new IOException("No valid local directories in property: "+dirsProp); 991 } 992 //跟上一个方法差不多,只不过路径变成了文件。 993 /** 994 * Get the {@link URL} for the named resource. 995 * 996 * @param name resource name. 997 * @return the url for the named resource. 998 */ 999 public URL getResource(String name) { 1000 return classLoader.getResource(name); 1001 } 1002 //根据名称获取资源URL链接 1003 /** 1004 * Get an input stream attached to the configuration resource with the 1005 * given <code>name</code>. 1006 * 1007 * @param name configuration resource name. 1008 * @return an input stream attached to the resource. 1009 */ 1010 public InputStream getConfResourceAsInputStream(String name) { 1011 try { 1012 URL url= getResource(name); 1013 1014 if (url == null) { 1015 LOG.info(name + " not found"); 1016 return null; 1017 } else { 1018 LOG.info("found resource " + name + " at " + url); 1019 } 1020 1021 return url.openStream(); 1022 } catch (Exception e) { 1023 return null; 1024 } 1025 } 1026 //从上个方法获取的URL链接开启一个输入流。从这可以看出这两个方法都是用在配置文件上的 1027 //看起来是废话,但是读第一个方法的时候不太确定。 1028 /** 1029 * Get a {@link Reader} attached to the configuration resource with the 1030 * given <code>name</code>. 1031 * 1032 * @param name configuration resource name. 1033 * @return a reader attached to the resource. 1034 */ 1035 public Reader getConfResourceAsReader(String name) { 1036 try { 1037 URL url= getResource(name); 1038 1039 if (url == null) { 1040 LOG.info(name + " not found"); 1041 return null; 1042 } else { 1043 LOG.info("found resource " + name + " at " + url); 1044 } 1045 1046 return new InputStreamReader(url.openStream()); 1047 } catch (Exception e) { 1048 return null; 1049 } 1050 } 1051 //同上,做了封装 1052 private synchronized Properties getProps() { 1053 if (properties == null) { 1054 properties = new Properties(); 1055 loadResources(properties, resources, quietmode); 1056 if (overlay!= null) { 1057 properties.putAll(overlay); 1058 if (storeResource) { 1059 for (Map.Entry<Object,Object> item: overlay.entrySet()) { 1060 updatingResource.put((String) item.getKey(), "Unknown"); 1061 } 1062 } 1063 } 1064 } 1065 return properties; 1066 } 1067 //加载所有键值对,如果为空,则重新加载,包括已经更改过的。 1068 //最后一个if包含的代码不是很懂,求大牛解释 1069 /** 1070 * Return the number of keys in the configuration. 1071 * 1072 * @return number of keys in the configuration. 1073 */ 1074 public int size() { 1075 return getProps().size(); 1076 } 1077 //获取所有键值对的数量 1078 /** 1079 * Clears all keys from the configuration. 1080 */ 1081 public void clear() { 1082 getProps().clear(); 1083 getOverlay().clear(); 1084 } 1085 //清空 1086 /** 1087 * Get an {@link Iterator} to go through the list of <code>String</code> 1088 * key-value pairs in the configuration. 1089 * 1090 * @return an iterator over the entries. 1091 */ 1092 public Iterator<Map.Entry<String, String>> iterator() { 1093 // Get a copy of just the string to string pairs. After the old object 1094 // methods that allow non-strings to be put into configurations are removed, 1095 // we could replace properties with a Map<String,String> and get rid of this 1096 // code. 1097 Map<String,String> result = new HashMap<String,String>(); 1098 for(Map.Entry<Object,Object> item: getProps().entrySet()) { 1099 if (item.getKey() instanceof String && 1100 item.getValue() instanceof String) { 1101 result.put((String) item.getKey(), (String) item.getValue()); 1102 } 1103 } 1104 return result.entrySet().iterator(); 1105 } 1106 //获取所有键值对的迭代接口 1107 private void loadResources(Properties properties, 1108 ArrayList resources, 1109 boolean quiet) { 1110 if(loadDefaults) { 1111 for (String resource : defaultResources) { 1112 loadResource(properties, resource, quiet); 1113 } 1114 1115 //support the hadoop-site.xml as a deprecated case 1116 if(getResource("hadoop-site.xml")!=null) { 1117 loadResource(properties, "hadoop-site.xml", quiet); 1118 } 1119 } 1120 1121 for (Object resource : resources) { 1122 loadResource(properties, resource, quiet); 1123 } 1124 } 1125 1126 private void loadResource(Properties properties, Object name, boolean quiet) { 1127 try { 1128 DocumentBuilderFactory docBuilderFactory 1129 = DocumentBuilderFactory.newInstance(); 1130 //ignore all comments inside the xml file 1131 docBuilderFactory.setIgnoringComments(true); 1132 1133 //allow includes in the xml file 1134 docBuilderFactory.setNamespaceAware(true); 1135 try { 1136 docBuilderFactory.setXIncludeAware(true); 1137 } catch (UnsupportedOperationException e) { 1138 LOG.error("Failed to set setXIncludeAware(true) for parser " 1139 + docBuilderFactory 1140 + ":" + e, 1141 e); 1142 } 1143 DocumentBuilder builder = docBuilderFactory.newDocumentBuilder(); 1144 Document doc = null; 1145 Element root = null; 1146 1147 if (name instanceof URL) { // an URL resource 1148 URL url = (URL)name; 1149 if (url != null) { 1150 if (!quiet) { 1151 LOG.info("parsing " + url); 1152 } 1153 doc = builder.parse(url.toString()); 1154 } 1155 } else if (name instanceof String) { // a CLASSPATH resource 1156 URL url = getResource((String)name); 1157 if (url != null) { 1158 if (!quiet) { 1159 LOG.info("parsing " + url); 1160 } 1161 doc = builder.parse(url.toString()); 1162 } 1163 } else if (name instanceof Path) { // a file resource 1164 // Can't use FileSystem API or we get an infinite loop 1165 // since FileSystem uses Configuration API. Use java.io.File instead. 1166 File file = new File(((Path)name).toUri().getPath()) 1167 .getAbsoluteFile(); 1168 if (file.exists()) { 1169 if (!quiet) { 1170 LOG.info("parsing " + file); 1171 } 1172 InputStream in = new BufferedInputStream(new FileInputStream(file)); 1173 try { 1174 doc = builder.parse(in); 1175 } finally { 1176 in.close(); 1177 } 1178 } 1179 } else if (name instanceof InputStream) { 1180 try { 1181 doc = builder.parse((InputStream)name); 1182 } finally { 1183 ((InputStream)name).close(); 1184 } 1185 } else if (name instanceof Element) { 1186 root = (Element)name; 1187 } 1188 1189 if (doc == null && root == null) { 1190 if (quiet) 1191 return; 1192 throw new RuntimeException(name + " not found"); 1193 } 1194 1195 if (root == null) { 1196 root = doc.getDocumentElement(); 1197 } 1198 if (!"configuration".equals(root.getTagName())) 1199 LOG.fatal("bad conf file: top-level element not <configuration>"); 1200 NodeList props = root.getChildNodes(); 1201 for (int i = 0; i < props.getLength(); i++) { 1202 Node propNode = props.item(i); 1203 if (!(propNode instanceof Element)) 1204 continue; 1205 Element prop = (Element)propNode; 1206 if ("configuration".equals(prop.getTagName())) { 1207 loadResource(properties, prop, quiet); 1208 continue; 1209 } 1210 if (!"property".equals(prop.getTagName())) 1211 LOG.warn("bad conf file: element not <property>"); 1212 NodeList fields = prop.getChildNodes(); 1213 String attr = null; 1214 String value = null; 1215 boolean finalParameter = false; 1216 for (int j = 0; j < fields.getLength(); j++) { 1217 Node fieldNode = fields.item(j); 1218 if (!(fieldNode instanceof Element)) 1219 continue; 1220 Element field = (Element)fieldNode; 1221 if ("name".equals(field.getTagName()) && field.hasChildNodes()) 1222 attr = ((Text)field.getFirstChild()).getData().trim(); 1223 if ("value".equals(field.getTagName()) && field.hasChildNodes()) 1224 value = ((Text)field.getFirstChild()).getData(); 1225 if ("final".equals(field.getTagName()) && field.hasChildNodes()) 1226 finalParameter = "true".equals(((Text)field.getFirstChild()).getData()); 1227 } 1228 1229 // Ignore this parameter if it has already been marked as 'final' 1230 if (attr != null) { 1231 if (value != null) { 1232 if (!finalParameters.contains(attr)) { 1233 properties.setProperty(attr, value); 1234 if (storeResource) { 1235 updatingResource.put(attr, name.toString()); 1236 } 1237 } else if (!value.equals(properties.getProperty(attr))) { 1238 LOG.warn(name+":a attempt to override final parameter: "+attr 1239 +"; Ignoring."); 1240 } 1241 } 1242 if (finalParameter) { 1243 finalParameters.add(attr); 1244 } 1245 } 1246 } 1247 1248 } catch (IOException e) { 1249 LOG.fatal("error parsing conf file: " + e); 1250 throw new RuntimeException(e); 1251 } catch (DOMException e) { 1252 LOG.fatal("error parsing conf file: " + e); 1253 throw new RuntimeException(e); 1254 } catch (SAXException e) { 1255 LOG.fatal("error parsing conf file: " + e); 1256 throw new RuntimeException(e); 1257 } catch (ParserConfigurationException e) { 1258 LOG.fatal("error parsing conf file: " + e); 1259 throw new RuntimeException(e); 1260 } 1261 } 1262 //从配置文件中获取所有键值对 1263 /** 1264 * Write out the non-default properties in this configuration to the give 1265 * {@link OutputStream}. 1266 * 1267 * @param out the output stream to write to. 1268 */ 1269 public void writeXml(OutputStream out) throws IOException { 1270 Properties properties = getProps(); 1271 try { 1272 Document doc = 1273 DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument(); 1274 Element conf = doc.createElement("configuration"); 1275 doc.appendChild(conf); 1276 conf.appendChild(doc.createTextNode("\n")); 1277 for (Enumeration e = properties.keys(); e.hasMoreElements();) { 1278 String name = (String)e.nextElement(); 1279 Object object = properties.get(name); 1280 String value = null; 1281 if (object instanceof String) { 1282 value = (String) object; 1283 }else { 1284 continue; 1285 } 1286 Element propNode = doc.createElement("property"); 1287 conf.appendChild(propNode); 1288 1289 Element nameNode = doc.createElement("name"); 1290 nameNode.appendChild(doc.createTextNode(name)); 1291 propNode.appendChild(nameNode); 1292 1293 Element valueNode = doc.createElement("value"); 1294 valueNode.appendChild(doc.createTextNode(value)); 1295 propNode.appendChild(valueNode); 1296 1297 conf.appendChild(doc.createTextNode("\n")); 1298 } 1299 1300 DOMSource source = new DOMSource(doc); 1301 StreamResult result = new StreamResult(out); 1302 TransformerFactory transFactory = TransformerFactory.newInstance(); 1303 Transformer transformer = transFactory.newTransformer(); 1304 transformer.transform(source, result); 1305 } catch (Exception e) { 1306 throw new RuntimeException(e); 1307 } 1308 } 1309 //把所有非默认配置的配置按XML的形式输出到给定的输出流上 1310 /** 1311 * Writes out all the parameters and their properties (final and resource) to 1312 * the given {@link Writer} 1313 * The format of the output would be 1314 * { "properties" : [ {key1,value1,key1.isFinal,key1.resource}, {key2,value2, 1315 * key2.isFinal,key2.resource}... ] } 1316 * It does not output the parameters of the configuration object which is 1317 * loaded from an input stream. 1318 * @param out the Writer to write to 1319 * @throws IOException 1320 */ 1321 public static void dumpConfiguration(Configuration conf, 1322 Writer out) throws IOException { 1323 Configuration config = new Configuration(conf,true); 1324 config.reloadConfiguration(); 1325 JsonFactory dumpFactory = new JsonFactory(); 1326 JsonGenerator dumpGenerator = dumpFactory.createJsonGenerator(out); 1327 dumpGenerator.writeStartObject(); 1328 dumpGenerator.writeFieldName("properties"); 1329 dumpGenerator.writeStartArray(); 1330 dumpGenerator.flush(); 1331 for (Map.Entry<Object,Object> item: config.getProps().entrySet()) { 1332 dumpGenerator.writeStartObject(); 1333 dumpGenerator.writeStringField("key", (String) item.getKey()); 1334 dumpGenerator.writeStringField("value", 1335 config.get((String) item.getKey())); 1336 dumpGenerator.writeBooleanField("isFinal", 1337 config.finalParameters.contains(item.getKey())); 1338 dumpGenerator.writeStringField("resource", 1339 config.updatingResource.get(item.getKey())); 1340 dumpGenerator.writeEndObject(); 1341 } 1342 dumpGenerator.writeEndArray(); 1343 dumpGenerator.writeEndObject(); 1344 dumpGenerator.flush(); 1345 } 1346 //把所有配置属性按某种格式导出来到指定输出流上 1347 //注释中说不包括通过输入流配置的属性,就是说只导出文件的,从几个用到的地方也只看到了文件 1348 //这里不太确定。 1349 /** 1350 * Get the {@link ClassLoader} for this job. 1351 * 1352 * @return the correct class loader. 1353 */ 1354 public ClassLoader getClassLoader() { 1355 return classLoader; 1356 } 1357 1358 /** 1359 * Set the class loader that will be used to load the various objects. 1360 * 1361 * @param classLoader the new class loader. 1362 */ 1363 public void setClassLoader(ClassLoader classLoader) { 1364 this.classLoader = classLoader; 1365 } 1366 1367 @Override 1368 public String toString() { 1369 StringBuffer sb = new StringBuffer(); 1370 sb.append("Configuration: "); 1371 if(loadDefaults) { 1372 toString(defaultResources, sb); 1373 if(resources.size()>0) { 1374 sb.append(", "); 1375 } 1376 } 1377 toString(resources, sb); 1378 return sb.toString(); 1379 } 1380 1381 private void toString(List resources, StringBuffer sb) { 1382 ListIterator i = resources.listIterator(); 1383 while (i.hasNext()) { 1384 if (i.nextIndex() != 0) { 1385 sb.append(", "); 1386 } 1387 sb.append(i.next()); 1388 } 1389 } 1390 1391 /** 1392 * Set the quietness-mode. 1393 * 1394 * In the quiet-mode, error and informational messages might not be logged. 1395 * 1396 * @param quietmode <code>true</code> to set quiet-mode on, <code>false</code> 1397 * to turn it off. 1398 */ 1399 public synchronized void setQuietMode(boolean quietmode) { 1400 this.quietmode = quietmode; 1401 } 1402 1403 /** For debugging. List non-default properties to the terminal and exit. */ 1404 public static void main(String[] args) throws Exception { 1405 new Configuration().writeXml(System.out); 1406 } 1407 1408 @Override 1409 public void readFields(DataInput in) throws IOException { 1410 clear(); 1411 int size = WritableUtils.readVInt(in); 1412 for(int i=0; i < size; ++i) { 1413 set(org.apache.hadoop.io.Text.readString(in), 1414 org.apache.hadoop.io.Text.readString(in)); 1415 } 1416 } 1417 //序列化方法。把RPC中的流序列化成“对象”,这个方法是@Override的,所以对象就是配置属性 1418 //set前调用了clear(),这个方法是本类的方法,然后clear()方法调用了HashTable的clear()方法,把所有配置全清了 1419 //@Override 1420 public void write(DataOutput out) throws IOException { 1421 Properties props = getProps(); 1422 WritableUtils.writeVInt(out, props.size()); 1423 for(Map.Entry<Object, Object> item: props.entrySet()) { 1424 org.apache.hadoop.io.Text.writeString(out, (String) item.getKey()); 1425 org.apache.hadoop.io.Text.writeString(out, (String) item.getValue()); 1426 } 1427 } 1428 //序列化方法。把对象序列化为RPC的流 1429 /** 1430 * get keys matching the the regex 1431 * @param regex 1432 * @return Map<String,String> with matching keys 1433 */ 1434 public Map<String,String> getValByRegex(String regex) { 1435 Pattern p = Pattern.compile(regex); 1436 1437 Map<String,String> result = new HashMap<String,String>(); 1438 Matcher m; 1439 1440 for(Map.Entry<Object,Object> item: getProps().entrySet()) { 1441 if (item.getKey() instanceof String && 1442 item.getValue() instanceof String) { 1443 m = p.matcher((String)item.getKey()); 1444 if(m.find()) { // match 1445 result.put((String) item.getKey(), (String) item.getValue()); 1446 } 1447 } 1448 } 1449 return result; 1450 } 1451 } 1452 //通过正则获取键值对集合
MapReduce的执行简单流程:用户作业执行JobClient.runJob(conf)代码会在Hadoop集群上将其启动。启动之后JobClient实例会向JobTracker获取JobId,而且客户端会将作业执行需要的作业资源复制到HDFS上,然后将作业提交给JobTracker。JobTracker在本地初始化作业,再从HDFS作业资源中获取作业输入的分割信息,根据这些信息JobTracker将作业分割成多个任务,然后分配给在于JobTracker心跳通信中请求任务的Tasktracker。TaskTracker接收到新的任务之后会先从HDFS上获取作业资源,包括作业配置信息和本作业分片输入,然后在本地启动一个JVM并执行任务。任务结束后将结果写回HDFS,并向JobTracker报告。
有些东西我说的不太明白,这里借鉴别人的
http://anyoneking.com/archives/212
http://f.dataguru.cn/thread-258563-1-1.html
原来JDK的源码包在JDK安装文件夹中,只要把src.zip解压就能用eclipse去attached source了。我这个井底蛙。
即使这样逐个看它的变量和方法,也给人一种雾里看花,不识庐山真面目的感觉。
要真正熟悉了hadoop的启动过程,过程中对配置的API调用和相关处理过程才能彻底理解这个类。
我还不够格,在网上查了也只是跟着别的思路走,期待后面自己体验和调试来融会贯通。
欲为大树,何与草争;心若不动,风又奈何。