使用java原生API,DOM4J,JDOM和SAX解析XML文件

解析 XML 有两种方式: SAX 和 DOM 。它们各有利弊。

        DOM 是把 XML 文档全部装载到内存中,然后当成一树进行处理。其好处是当成树处理起来比较方便,但弊端是如果 XML 文件比较大时,会对内存消耗比较大;

        SAX 是逐行扫描 XML 文档,逐行解析,而且可以在处理 XML 文档过程中的任意时刻中止处理过程,比如找到我们的目标节点,剩下的 XML 文档内容就可以不读了,直接结束。其弊端是操作起来相对不方便,而且对 XML 文档进行处理,如果修改、新增、删除等操作比较不方便。

        SAX 是事件驱动型 XML 解析的一个标准接口。它的工作原理是读到文档的开始与结束、标签元素的开始与结束、内容实体等地方时,触发相应的函数,我们就可以在相应的函数中进行我们所要进行的处理。

 

1.使用Java API解析DOM解析

 

 

只在跟节点<addresslist>下面建立一个子节点<name>

<?xml version="1.0" encoding="UTF-8"?>
<addresslist>
	<linkman>
		<name>张三</name>
		<email>www.baidu.com</email>
	</linkman>
	
	<linkman>
		<name>李四</name>
		<email>www.sina.com</email>
	</linkman>
</addresslist>

 

import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

//Function        : 	DOM_demo
public class DOM_demo {

	public static void main(String[] args) throws Exception{
		// TODO 自动生成的方法存根
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();	//建立DocumentBuilderFactory
		DocumentBuilder builder = factory.newDocumentBuilder();		//建立DocumentBuilder
		Document doc = null;
		try{
			doc = builder.parse("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name.xml");
		}catch(SAXException e){
			e.printStackTrace();
		}catch(IOException e){
			e.printStackTrace();
		}
		NodeList nl = doc.getElementsByTagName("name");		//查找name节点
		System.out.println("姓名:"+nl.item(1).getFirstChild().getNodeValue());		//输出第1个节点的内容
	}

}

 

一些DOM操作,循环输出节点信息

import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Element;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

//Function        : 	DOM_demo
public class DOM_demo {

	public static void main(String[] args) throws Exception{
		// TODO 自动生成的方法存根
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();	//建立DocumentBuilderFactory
		DocumentBuilder builder = factory.newDocumentBuilder();		//建立DocumentBuilder
		Document doc = null;
		try{
			doc = builder.parse("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name.xml");
		}catch(SAXException e){
			e.printStackTrace();
		}catch(IOException e){
			e.printStackTrace();
		}
		
//		NodeList nl = doc.getElementsByTagName("name");		//查找name节点
//		System.out.println("姓名:"+nl.item(1).getFirstChild().getNodeValue());		//输出第1个节点的内容
		
		NodeList lm = doc.getElementsByTagName("linkman");		//查找linkman节点
		for(int i=0;i<lm.getLength();i++){
			Element e = (Element)lm.item(i);		//取得每一个元素
			System.out.println("姓名:"+e.getElementsByTagName("name").item(0).getFirstChild().getNodeValue());
			System.out.println("邮箱:"+e.getElementsByTagName("email").item(0).getFirstChild().getNodeValue());
		}
	}

}

 

生成XML文件

import java.io.File;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;

//Function        : 	DOM_demo
public class DOM_demo {

	public static void main(String[] args) throws Exception{
		// TODO 自动生成的方法存根
		//建立DocumentBuilderFactory
		DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();	
		//建立DocumentBuilder
		DocumentBuilder builder = factory.newDocumentBuilder();
		//定义Document接口对象,通过DocumentBuilder类进行DOM树的转换操作
		Document doc = null;
		doc = builder.newDocument();

		//建立各个操作节点
		Element addresslist = doc.createElement("addresslist");
		Element linkman = doc.createElement("linkman");
		Element name = doc.createElement("name");
		Element email = doc.createElement("email");
		//设置节点的文本内容,即为每一个节点添加文本节点
		name.appendChild(doc.createTextNode("王五"));
		email.appendChild(doc.createTextNode("www.soho.com"));
		//设置节点关系
		linkman.appendChild(name);					//子节点
		linkman.appendChild(email);					//子节点
		addresslist.appendChild(linkman);		//子节点
		doc.appendChild(addresslist);				//文档上保存节点
		//输出文档到文件中
		TransformerFactory tf = TransformerFactory.newInstance();
		Transformer t = null;
		try{
			t = tf.newTransformer();
		}catch(TransformerConfigurationException e1){
			e1.printStackTrace();
		}
		t.setOutputProperty(OutputKeys.ENCODING, "UTF-8");		//设置编码
		DOMSource source = new DOMSource(doc);							//输出文档
		StreamResult result = new StreamResult(new File("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name_output.xml")) ;
		try{
			t.transform(source, result); 					//输出
		}catch(TransformerException e){
			e.printStackTrace();
		}
	}

}

 

2.使用DOM4J解析XML

 

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;

import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

//Function        : 	DOM4JWriter
public class DOM4JWriter {

	public static void main(String[] args) {
		// TODO 自动生成的方法存根
		Document doc = DocumentHelper.createDocument();		//创建文档
		
		Element addresslist = doc.addElement("addresslist");		//定义节点
		Element linkman = addresslist.addElement("linkman");	//定义子节点
		Element name = addresslist.addElement("name");				//定义子节点
		Element email = addresslist.addElement("email");				//定义子节点
		name.setText("张三");																	//设置name节点内容
		email.setText("www.baidu.com");												//设置email节点内容
		//设置输出格式
		OutputFormat format = OutputFormat.createPrettyPrint();
		//设置输出编码
		format.setEncoding("UTF-8");
		try{
			XMLWriter writer = new XMLWriter(new FileOutputStream(new File("/home/common/software/coding/HelloWord/JavaWeb/bin/name_out.xml")),format);
			writer.write(doc);		//输出内容
			writer.close();				//关闭输出流
		}catch(IOException e){
			e.printStackTrace();
		}
	}

}

 

解析输出文件

import java.io.File;
import java.util.Iterator;

import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

//Function        : 	DOM4JReader
public class DOM4JReader {

	public static void main(String[] args) {
		// TODO 自动生成的方法存根
		File file = new File("/home/common/software/coding/HelloWord/JavaWeb/bin/name_out.xml");
		SAXReader reader = new SAXReader();			//建立SAX解析读取
		Document doc = null;				
		try{
			doc = reader.read(file);										//读取文档
		}catch(DocumentException e){
			e.printStackTrace();
		}
		Element root = doc.getRootElement();			//取得根元素
		Iterator iter = root.elementIterator();				//取得全部的子节点
		while(iter.hasNext()){
			Element linkman = (Element) iter.next();	//取得每一个linkman
			System.out.println("姓名:"+linkman.elementText("name"));
			System.out.println("邮件:"+linkman.elementText("email"));
		}
		
	}

}

 

3.使用JDOM解析XML文件

 

JavaDOC的网址:http://www.jdom.org/docs/apidocs/index.html

 

import java.io.FileOutputStream;

import org.jdom2.Attribute;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.output.XMLOutputter;
import org.xml.sax.Attributes;

public class WriteXML {

	public static void main(String[] args) {
		// TODO 自动生成的方法存根
		//建立各个操作节点
		Element addresslist = new Element("addresslist");
		Element linkman = new Element("linkman");
		Element name = new Element("name");
		Element email = new Element("email");
		//定义属性
		Attribute id = new Attribute("id","zs"); 
		//声明一个Document对象
		Document doc = new Document(addresslist);
		//设置元素的内容
		name.setText("张三");
		name.setAttribute(id);				//设置name的属性
		email.setText("www.baidu.com");
		//设置linkman的子节点
		linkman.addContent(name);
		linkman.addContent(email);
		//将linkman加入根节点中
		addresslist.addContent(linkman);
		//用来输出XML文件
		XMLOutputter out = new XMLOutputter();
		//设置输出的编码
		out.setFormat(out.getFormat().setEncoding("UTF-8"));
		//输出XML文件
		try{
			out.output(doc, new FileOutputStream("/home/common/software/coding/HelloWord/JavaWeb/bin/address.xml"));
		}catch(Exception e){
			e.printStackTrace();
		}
	}

}

 

import java.util.List;

import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.input.SAXBuilder;

//Function        : 	ReadXML
public class ReadXML {

	public static void main(String[] args) throws Exception{
		// TODO 自动生成的方法存根
		//建立SAX解析
		SAXBuilder builder = new SAXBuilder();
		//找到Document
		Document read_doc = builder.build("/home/common/software/coding/HelloWord/JavaWeb/bin/address.xml");
		//读取根元素
		Element stu = read_doc.getRootElement();
		//得到全部linkman子元素
		List list = stu.getChildren("linkman");
		for(int i=0;i<list.size();i++){
			Element e = (Element)list.get(i);		//取得全部的linkman子元素
			String name = e.getChildText("name");
			String id = e.getChild("name").getAttribute("id").getValue();
			String email = e.getChildText("email");
			
			System.out.println("---------联系人---------");
			System.out.println("姓名:"+name+",编号:"+id);
			System.out.println("邮箱:"+email);
			System.out.println("------------------");
		}
	}

}

 

4.使用SAX解析XML文件

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class MySAX extends DefaultHandler{				//定义SAX解析器
	public void startDocument() throws SAXException{		//文档开始
		System.out.println("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
	}
	
	public void endDocument() throws SAXException{		//文档结束
		System.out.println("\n 文档读取结束。。。");
	}
	
	public void startElement(String uri,String localName,String name,Attributes attributes) throws SAXException{
		System.out.print("<");
		System.out.print(name);
		if(attributes != null){
			for(int i=0;i<attributes.getLength();i++){
				System.out.print(" "+attributes.getQName(i)+"=\""+attributes.getValue(i)+"\"");
			}
			System.out.print(">");
		}
	}
	
	public void character(char[] ch,int start,int lenght) throws SAXException{
		System.out.print(new String(ch,start,lenght));
	}
	
	public void endElement(String uri,String localName,String name) throws SAXException{
		System.out.print("</");
		System.out.print(name);
		System.out.print(">");
	}
	
}

 

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

public class testSAX {

	public static void main(String[] args) throws Exception{
		// TODO 自动生成的方法存根
		//建立SAX解析工厂
		SAXParserFactory factory = SAXParserFactory.newInstance();
		//构造解析器
		SAXParser parser = factory.newSAXParser();
		//解析XML,使用HANDLER
		parser.parse("/home/common/software/coding/HelloWord/JavaWeb/bin/dom_name.xml", new MySAX());
	}

}

 

posted @ 2016-03-31 21:47  tonglin0325  阅读(262)  评论(0编辑  收藏  举报