DOM4J介绍与代码示例
DOM4J是dom4j.org出品的一个开源XML解析包。Dom4j是一个易用的、开源的库,用于XML,XPath和XSLT。它应用于Java平台,采用了Java集合框架并完全支持DOM,SAX和JAXP。
DOM4J下载jar包:http://downloads.sourceforge.net/dom4j/dom4j-1.6.1.jar
JAXEN(对XPath的支持):http://dist.codehaus.org/jaxen/distributions/jaxen-1.1.1.zip
1.DOM4J主要接口
DOM4J主要接口都在org.dom4j这个包里定义。
-Node为所有的dom4j中XML节点定义了多态行为;
-Branch为能够包含子节点的节点如XML元素(Element)和文档(Docuemnts)定义了一个公共的行为;
|-Element 定义XML 元素;
|-Document定义了XML文档;
-DocumentType 定义XML DOCTYPE声明;
-Entity定义 XML entity;
-Attribute定义了XML的属性;
-ProcessingInstruction 定义 XML 处理指令;
-CharacterData是一个标识借口,标识基于字符的节点。如CDATA,Comment, Text;
|- CDATA 定义了XML CDATA 区域;
|-Text 定义XML 文本节点;
|- Comment 定义了XML注释的行为;
项目结构
maven代码
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>lqy</groupId> <artifactId>Dom4jTest</artifactId> <version>0.0.1-SNAPSHOT</version> <packaging>jar</packaging> <name>Dom4jTest</name> <url>http://maven.apache.org</url> <properties> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> </properties> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>dom4j</groupId> <artifactId>dom4j</artifactId> <version>1.6.1</version> </dependency> <dependency> <groupId>jaxen</groupId> <artifactId>jaxen</artifactId> <version>1.1.4</version> </dependency> </dependencies> </project>
1.创建XML文档
package lqy.Dom4jTest; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.io.OutputFormat; import org.dom4j.io.XMLWriter; /* * 创建XML文档 */ public class XmlGen { /* * 方法generateDocumentByMethod()通过调用方法构建xml文档: 1.使用DocumentHelper得到Document实例 Document document = DocumentHelper.createDocument(); 2.创建Processing Instruction document.addProcessingInstruction("xml-stylesheet", inMap); 3.创建元素Element Element studentsElement = document.addElement("students"); 4.为元素添加注释Comment studentsElement.addComment("An Student Catalog"); 5.为元素添加属性 studentsElement.addComment("An Student Catalog"); 6.为元素添加文本值Text ageElement.setText("18"); */ public Document generateDocumentByMethod() { Document document = DocumentHelper.createDocument(); // ProcessingInstruction Map<String, String> inMap = new HashMap<String, String>(); inMap.put("type", "text/xsl"); inMap.put("href", "students.xsl"); document.addProcessingInstruction("xml-stylesheet", inMap); // root element Element studentsElement = document.addElement("students"); studentsElement.addComment("An Student Catalog"); // son element Element stuElement = studentsElement.addElement("student"); stuElement.addAttribute("sn", "01"); Element nameElement = stuElement.addElement("name"); nameElement.setText("sam"); Element ageElement = stuElement.addElement("age"); ageElement.setText("18"); // son element Element anotherStuElement = studentsElement.addElement("student"); anotherStuElement.addAttribute("sn", "02"); Element anotherNameElement = anotherStuElement.addElement("name"); anotherNameElement.setText("lin"); Element anotherAgeElement = anotherStuElement.addElement("age"); anotherAgeElement.setText("20"); return document; } /* * 方法generateDocumentByString()通过字符串转换直接构建xml文档,使用DocumentHelper.parseText()来实现. * document = DocumentHelper.parseText(text); */ public Document generateDocumentByString() { String text = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<?xml-stylesheet type=\"text/xsl\" href=\"students.xsl\"?>" + "<students><!--An Student Catalog--> <student sn=\"01\">" + "<name>sam</name><age>18</age></student><student sn=\"02\">" + "<name>lin</name><age>20</age></student></students>"; Document document = null; try { document = DocumentHelper.parseText(text); } catch (DocumentException e) { e.printStackTrace(); } return document; } public void saveDocument(Document document, File outputXml) { try { // 美化格式 OutputFormat format = OutputFormat.createPrettyPrint(); /*// 缩减格式 OutputFormat format = OutputFormat.createCompactFormat();*/ /*// 指定XML编码 format.setEncoding("GBK");*/ XMLWriter output = new XMLWriter(new FileWriter(outputXml), format); output.write(document); output.close(); } catch (IOException e) { System.out.println(e.getMessage()); } } public static void main(String[] argv) { XmlGen dom4j = new XmlGen(); Document document = null; //通过方法生成 document=dom4j.generateDocumentByMethod(); //通过字符串生成 //document = dom4j.generateDocumentByString(); dom4j.saveDocument(document, new File("students-gen.xml")); } }
生成students-gen.xml如下
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="students.xsl"?> <students> <!--An Student Catalog--> <student sn="01"> <name>sam</name> <age>18</age> </student> <student sn="02"> <name>lin</name> <age>20</age> </student> </students>
方法generateDocumentByMethod()通过调用方法构建xml文档:
1.使用DocumentHelper得到Document实例
Document document = DocumentHelper.createDocument();
2.创建Processing Instruction
document.addProcessingInstruction("xml-stylesheet", inMap);
3.创建元素Element
Element studentsElement = document.addElement("students");
4.为元素添加注释Comment
studentsElement.addComment("An Student Catalog");
5.为元素添加属性
studentsElement.addComment("An Student Catalog");
6.为元素添加文本值Text
ageElement.setText("18");
方法generateDocumentByString()通过字符串转换直接构建xml文档,使用DocumentHelper.parseText()来实现.
document = DocumentHelper.parseText(text);
方法saveDocument(Document document, File outputXml)将文档输出到文件保存,可指定字符编码,可指定格式化输出。
2.修改XML文档
package lqy.Dom4jTest; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Iterator; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; /* * 这里使用xpath来定位待修改的元素和属性,需要jaxen的支持。 *示例中将students-gen.xml的第一个student元素的sn属性改为001,其子元素name内容改为jeff。 *XmlMod.java */ public class XmlMod { /* * 1.使用File定位文件资源,并基于此获得Document实例 *SAXReader saxReader = new SAXReader(); *Document document = saxReader.read(inputXml); *2.Document实例的selectNodes方法可以传入xpath,并返回一个List实例,基于此使用迭代器,完成特定的应用 *List list = document.selectNodes("//students/student/@sn"); */ public void modifyDocument(File inputXml) { try { SAXReader saxReader = new SAXReader(); Document document = saxReader.read(inputXml); List list = document.selectNodes("//students/student/@sn"); Iterator iter = list.iterator(); while (iter.hasNext()) { Attribute attribute = (Attribute) iter.next(); if (attribute.getValue().equals("01")) attribute.setValue("001"); } list = document.selectNodes("//students/student"); iter = list.iterator(); while (iter.hasNext()) { Element element = (Element) iter.next(); Iterator iterator = element.elementIterator("name"); while (iterator.hasNext()) { Element nameElement = (Element) iterator.next(); if (nameElement.getText().equals("sam")) nameElement.setText("jeff"); } } XMLWriter output = new XMLWriter(new FileWriter(new File( "students-modified.xml"))); output.write(document); output.close(); } catch (DocumentException e) { System.out.println(e.getMessage()); } catch (IOException e) { System.out.println(e.getMessage()); } } /* * selectSingleNode如果有多个只取第一个 */ public void modifyDocument2(File inputXml){ SAXReader saxReader = new SAXReader(); try { Document document = saxReader.read(inputXml); Node nodeAttr=document.selectSingleNode("//students/student/@sn"); System.out.println(nodeAttr.getText()); nodeAttr.setText("nodeAttr"); Node nodeEle=document.selectSingleNode("//students/student"); System.out.println(nodeEle.getText()); nodeEle.setText("nodeEle"); XMLWriter output = new XMLWriter(new FileWriter(new File( "students-modified2.xml"))); output.write(document); output.close(); } catch (Exception e) { e.printStackTrace(); } } public static void main(String[] argv) { XmlMod dom4jParser = new XmlMod(); //dom4jParser.modifyDocument(new File("students-gen.xml")); dom4jParser. modifyDocument2(new File("students-gen.xml")); } }
生成students-modified.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="students.xsl"?><students> <!--An Student Catalog--> <student sn="001"> <name>jeff</name> <age>18</age> </student> <student sn="02"> <name>lin</name> <age>20</age> </student> </students>
生成students-modified2.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="students.xsl"?><students> <!--An Student Catalog--> <student sn="nodeAttr"><name>sam</name><age>18</age>nodeEle</student> <student sn="02"> <name>lin</name> <age>20</age> </student> </students>
1.使用File定位文件资源,并基于此获得Document实例
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(inputXml);
2.Document实例的selectNodes方法可以传入xpath,并返回一个List实例,基于此使用迭代器,完成特定的应用
List list = document.selectNodes("//students/student/@sn");
3.遍历XML文档
这里提供两种遍历方法,一种是基于迭代的遍历,一种是基于Visitor模式的遍历。这里介绍迭代的
package lqy.Dom4jTest; import java.io.File; import java.util.Iterator; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.Element; import org.dom4j.io.SAXReader; /* * 方法traversalDocumentByIterator()提供一种基于迭代的遍历实现,每个Element通过elementIterator()和attributeIterator()取代其子元素和属性的迭代器。 */ public class XmlIterator { private File inputXml; public XmlIterator(File inputXml) { this.inputXml = inputXml; } public static void main(String[] argv) { XmlIterator dom4jParser = new XmlIterator(new File("students-gen.xml")); dom4jParser.traversalDocumentByIterator(); } public Element getRootElement() { return getDocument().getRootElement(); } public Document getDocument() { SAXReader saxReader = new SAXReader(); Document document = null; try { document = saxReader.read(inputXml); } catch (Exception e) { e.printStackTrace(); } return document; } /* * 该方法只是枚举了两层,如果一直挖的话要用递归方法 */ public void traversalDocumentByIterator() { Element root = getRootElement(); // 枚举根节点下所有子节点 for (Iterator ie = root.elementIterator(); ie.hasNext();) { System.out.println("======"); Element element = (Element) ie.next(); System.out.println(element.getName()); // 枚举属性 for (Iterator ia = element.attributeIterator(); ia.hasNext();) { Attribute attribute = (Attribute) ia.next(); System.out.println(attribute.getName() + ":" + attribute.getData()); } // 枚举当前节点下所有子节点 for (Iterator ieson = element.elementIterator(); ieson.hasNext();) { Element elementSon = (Element) ieson.next(); System.out.println(elementSon.getName() + ":"+ elementSon.getText()); } } } }
输出
4.通过Element找到节点
package lqy.Dom4jTest; import java.io.File; import java.util.Iterator; import java.util.List; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; public class XmlReadByElement { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { XmlReadByElement.getOneEle(); XmlReadByElement.getEles(); XmlReadByElement.getAttrs(); XmlReadByElement.getOneAttr(); } public static Document getDocument() throws Exception{ SAXReader reader = new SAXReader(); Document document = reader.read(new File("students-read.xml")); return document; } //要顺序一个一个往下找 public static void getOneEle() throws Exception{ Document document=getDocument(); Element rootElm = document.getRootElement(); Element e=rootElm.element("lqy"); System.out.println("arrt:"+e.attributeValue("aa")); Element e2=e.element("login"); System.out.println("text:"+e2.getText()); } public static void getEles() throws Exception{ Document document=getDocument(); Element rootElm = document.getRootElement(); List eList=rootElm.elements("student"); Iterator it=eList.iterator(); while(it.hasNext()){ Element elm=(Element)it.next(); System.out.println("arrt:"+elm.attributeValue("sn")); } } public static void getOneAttr() throws Exception{ Document document=getDocument(); Node node1=document.selectSingleNode("//students/lqy/login"); Element elm=(Element)node1; System.out.println("attr:"+elm.attributeValue("sn")); } public static void getAttrs() throws Exception{ Document document=getDocument(); List list = document.selectNodes("//students/student"); Iterator it=list.iterator(); while(it.hasNext()){ Element elm=(Element)it.next(); System.out.println("attr:"+elm.attributeValue("sn")); } } }
students-read.xml
<?xml version="1.0" encoding="UTF-8"?> <?xml-stylesheet type="text/xsl" href="students.xsl"?> <students> <!--An Student Catalog--> <student sn="01"> <name>sam</name> <age>18</age> </student> <student sn="02"> <name>lin</name> <age>20</age> </student> <lqy aa="123123123"> <login sn="03">luo</login> <pass>123456</pass> </lqy> </students>
输出结果
5.通过XPath找到节点
package lqy.Dom4jTest; import java.io.File; import java.util.Iterator; import java.util.List; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; public class XmlReadByXPath { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { XmlReadByXPath.getOneEleByXPath(); XmlReadByXPath.getElesByXPath(); XmlReadByXPath.getAttrsByXPath(); XmlReadByXPath.getOneAttrByXPath(); } public static Document getDocument() throws Exception{ SAXReader reader = new SAXReader(); Document document = reader.read(new File("students-read.xml")); return document; } public static void getOneEleByXPath() throws Exception{ Document document=getDocument(); Node node1=document.selectSingleNode("//students/lqy/login"); String nodeText1=node1.getText(); System.out.println("nodeText1:"+nodeText1); //假如多个只返回第一个 Node node2=document.selectSingleNode("//students/student/name"); String nodeText2=node2.getText(); System.out.println("nodeText2:"+nodeText2); Node node3=document.selectSingleNode("//students/student/@sn"); short nodeText3=node1.ATTRIBUTE_NODE; System.out.println("nodeText3:"+nodeText3); } public static void getElesByXPath() throws Exception{ Document document=getDocument(); List list = document.selectNodes("//students/student/name"); Iterator it=list.iterator(); while(it.hasNext()){ Element elm=(Element)it.next(); System.out.println("elm:"+elm.getText()); } } public static void getOneAttrByXPath() throws Exception{ Document document=getDocument(); Node node1=document.selectSingleNode("//students/lqy/login"); Element elm=(Element)node1; System.out.println("attr:"+elm.attributeValue("sn")); } public static void getAttrsByXPath() throws Exception{ Document document=getDocument(); List list = document.selectNodes("//students/student"); Iterator it=list.iterator(); while(it.hasNext()){ Element elm=(Element)it.next(); System.out.println("attr:"+elm.attributeValue("sn")); } } }
输出结果
6.XML转换成String
package lqy.Dom4jTest; import java.io.File; import java.util.Iterator; import java.util.List; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.SAXReader; public class XMLToString { /** * @param args * @throws Exception */ public static void main(String[] args) throws Exception { SAXReader reader = new SAXReader(); Document document = reader.read(new File("students-read.xml")); String docXmlText=document.asXML(); System.out.println(docXmlText); System.out.println("------------------docXmlText-------------------"); Element root=document.getRootElement(); String rootXmlText=root.asXML(); System.out.println(rootXmlText); System.out.println("------------------rootXmlText-------------------"); Element memberElm=root.element("lqy"); String memberXmlText=memberElm.asXML(); System.out.println(memberXmlText); System.out.println("------------------memberXmlText-------------------"); } }
输出
参考
1.http://zhangjunhd.blog.51cto.com/113473/126310
2.http://www.blogjava.net/junglesong/archive/2008/02/21/181196.html