SAX解析xml (遍历DOM树各节点)
本文参考 http://yangjunfeng.iteye.com/blog/401377
1. books.xml
1 <?xml version="1.0" encoding="UTF-8"?> 2 <bk:books count="3" xmlns:bk="http://test.org/books"> 3 <!--books's comment --> 4 <bk:book id="1"> 5 <bk:name>Thinking in JAVA</bk:name> 6 </bk:book> 7 <bk:book id="2"> 8 <bk:name>Core JAVA2</bk:name> 9 </bk:book> 10 <bk:book id="3"> 11 <bk:name>C++ primer</bk:name> 12 <addr:address xmlns:addr="http://test.org/address"> 13 <addr:state>China</addr:state> 14 <addr:city>ShangHai</addr:city> 15 </addr:address> 16 </bk:book> 17 </bk:books>
xml文件基本结构:
<books>Text文本节点("\n\t"换行+制表符)
<book>Text文本节点("\n\t\t"换行+2个制表符)
<name>Text文本节点("Thinking in JAVA")</name>Text文本节点("\n\t"换行+制表符)
</book>
.......
.......
</books>
2. 使用SAX api 解析xml
1 package sax.parsing.example; 2 3 import java.io.FileInputStream; 4 import java.io.FileNotFoundException; 5 import java.io.IOException; 6 import java.io.Reader; 7 8 import org.testng.annotations.Test; 9 import org.xml.sax.Attributes; 10 import org.xml.sax.ContentHandler; 11 import org.xml.sax.InputSource; 12 import org.xml.sax.Locator; 13 import org.xml.sax.SAXException; 14 import org.xml.sax.XMLReader; 15 import org.xml.sax.helpers.XMLReaderFactory; 16 17 import bsh.This; 18 19 20 class MyContentHandler implements ContentHandler { 21 22 private StringBuffer stringBuffer; 23 int frontBlankCount = 0; 24 25 26 public String toBlankString(int count) { 27 StringBuffer buffer = new StringBuffer(); 28 for (int i=0; i<count; i++) 29 buffer.append(" "); 30 return buffer.toString(); 31 } 32 33 @Override 34 public void setDocumentLocator(Locator locator) { 35 System.out.println(this.toBlankString(this.frontBlankCount) 36 + ">>> set document_locator : (lineNumber = " + locator.getLineNumber() 37 + ", columnNumber = " + locator.getColumnNumber() 38 + ", systemId = " + locator.getSystemId() 39 + ". publicId = " + locator.getPublicId() 40 + ")" 41 ); 42 } 43 44 @Override 45 public void startDocument() throws SAXException { 46 System.out.println(this.toBlankString(frontBlankCount) + ">>> start document"); 47 } 48 49 @Override 50 public void endDocument() throws SAXException { 51 System.out.print(this.toBlankString(frontBlankCount) + ">>> end document"); 52 } 53 54 @Override 55 public void startPrefixMapping(String prefix, String uri) throws SAXException { 56 System.out.println("\n" + this.toBlankString(this.frontBlankCount) 57 + ">>> start prefix_mapping : xmlns:" + prefix + "=\"" + uri + "\""); 58 } 59 60 @Override 61 public void endPrefixMapping(String prefix) throws SAXException { 62 System.out.print("\n" + this.toBlankString(this.frontBlankCount) + ">>> end prefix_mapping : " + prefix); 63 } 64 65 @Override 66 public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { 67 System.out.print(this.toBlankString(this.frontBlankCount++) + ">>> start element : " 68 + qName + "(" + uri + ")" 69 ); 70 } 71 72 @Override 73 public void endElement(String uri, String localName, String qName) throws SAXException { 74 System.out.print(this.toBlankString(--this.frontBlankCount) + ">>> end element : " 75 + qName + "(" + uri + ")" 76 ); 77 } 78 79 /** 80 * Text文本节点 处理 81 */ 82 @Override 83 public void characters(char[] ch, int start, int length) throws SAXException { 84 85 StringBuffer buffer = new StringBuffer(); 86 87 for (int i=start; i<start+length; i++) { 88 switch (ch[i]) { 89 case '\\': buffer.append("\\\\");break; 90 case '\r': buffer.append("\\r");break; 91 case '\n': buffer.append("\\n");break; 92 case '\t': buffer.append("\\t");break; 93 case '\"': buffer.append("\\\"");break; 94 default: buffer.append(ch[i]); 95 } 96 } 97 System.out.println("\t>>> characters(" + length + "): " + buffer.toString()); 98 } 99 100 @Override 101 public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 102 103 StringBuffer buffer = new StringBuffer(); 104 105 for (int i=start; i<start+length; i++) { 106 switch (ch[i]) { 107 case '\\': buffer.append("\\\\");break; 108 case '\r': buffer.append("\\r");break; 109 case '\n': buffer.append("\\n");break; 110 case '\t': buffer.append("\\t");break; 111 case '\"': buffer.append("\\\"");break; 112 default: buffer.append(ch[i]); 113 } 114 } 115 System.out.print(this.toBlankString(frontBlankCount) 116 + ">>> ignorable whitespace(" + length + "): " + buffer.toString()); 117 } 118 119 @Override 120 public void processingInstruction(String target, String data) throws SAXException { 121 System.out.print(this.toBlankString(this.frontBlankCount) 122 + ">>> process instruction : (target = \"" + target + "\", data = \"" + data + ")"); 123 124 } 125 126 @Override 127 public void skippedEntity(String name) throws SAXException { 128 System.out.print(this.toBlankString(this.frontBlankCount) + ">>> skipped_entity : " + name); 129 } 130 } 131 132 public class SaxTest { 133 134 135 @Test 136 public void test() throws SAXException, FileNotFoundException, IOException { 137 138 139 XMLReader reader = XMLReaderFactory.createXMLReader(); 140 reader.setFeature("http://xml.org/sax/features/validation", true); 141 reader.setFeature("http://xml.org/sax/features/namespaces", true); 142 reader.setContentHandler(new MyContentHandler()); 143 reader.parse(new InputSource(new FileInputStream("src/sax/parsing/example/books.xml"))); 144 145 } 146 }
输出结果:
>>> set document_locator : (lineNumber = 1, columnNumber = 1, systemId = null. publicId = null)
>>> start document
[Error] :5:10: Document is invalid: no grammar found.
[Error] :5:10: Document root element "bk:books", must match DOCTYPE root "null".
>>> start prefix_mapping : xmlns:bk="http://test.org/books"
>>> start element : bk:books(http://test.org/books) >>> characters(2): \n\t
>>> start element : bk:book(http://test.org/books) >>> characters(3): \n\t\t
>>> start element : bk:name(http://test.org/books) >>> characters(16): Thinking in JAVA
>>> end element : bk:name(http://test.org/books) >>> characters(2): \n\t
>>> end element : bk:book(http://test.org/books) >>> characters(2): \n\t
>>> start element : bk:book(http://test.org/books) >>> characters(3): \n\t\t
>>> start element : bk:name(http://test.org/books) >>> characters(10): Core JAVA2
>>> end element : bk:name(http://test.org/books) >>> characters(2): \n\t
>>> end element : bk:book(http://test.org/books) >>> characters(2): \n\t
>>> start element : bk:book(http://test.org/books) >>> characters(3): \n\t\t
>>> start element : bk:name(http://test.org/books) >>> characters(10): C++ primer
>>> end element : bk:name(http://test.org/books) >>> characters(3): \n\t\t
>>> start prefix_mapping : xmlns:addr="http://test.org/address"
>>> start element : addr:address(http://test.org/address) >>> characters(4): \n\t\t\t
>>> start element : addr:state(http://test.org/address) >>> characters(5): China
>>> end element : addr:state(http://test.org/address) >>> characters(4): \n\t\t\t
>>> start element : addr:city(http://test.org/address) >>> characters(8): ShangHai
>>> end element : addr:city(http://test.org/address) >>> characters(3): \n\t\t
>>> end element : addr:address(http://test.org/address)
>>> end prefix_mapping : addr >>> characters(2): \n\t
>>> end element : bk:book(http://test.org/books) >>> characters(1): \n
>>> end element : bk:books(http://test.org/books)
>>> end prefix_mapping : bk>>> end document