java解析XML

DOM解析XML

1 遍历xml文档节点树

student.xml

<?xml version="1.0" encoding="utf-8" ?>

<students>
    <student sn="01"><!-- 在student与name父子元素节点之间的是一个文本节点('\n\t\t') -->
        <name>张三</name>
        <age>18</age>
        <score>100</score>
    </student>

    <student sn="02">
        <name>lisi</name>
        <age>20</age>
        <score>100</score>
    </student>
</students>

 

遍历xml文档树:

package dom.pasing;

import java.io.File;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;

public class DomPrinter {
    
    public static void printNodeInfo(String nodeType, Node node) {
        
        System.out.println(nodeType + "\t" + node.getNodeName() + ":" + node.getNodeValue());
    }
    
    public static void traverseNode(Node node) {
    
        switch (node.getNodeType()) {
        case Node.PROCESSING_INSTRUCTION_NODE:
            printNodeInfo("处理指令", node);
            break;

        case Node.ELEMENT_NODE:
            printNodeInfo("元素", node);
            NamedNodeMap attrs = node.getAttributes();
            for (int i=0; i<attrs.getLength(); i++) {
                Node attr = attrs.item(i);
                printNodeInfo("属性", attr);
            }
            break;
        case Node.TEXT_NODE:
            printNodeInfo("文本", node);
            break;

        }
        
        Node child = node.getFirstChild();
        while (child != null) {
            traverseNode(child);
            child = child.getNextSibling();
        }
    }
    
    
    public static void main(String[] args) {
        
        DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
        
        try {
            DocumentBuilder builder = builderFactory.newDocumentBuilder();
            Document document = builder.parse(new File("src/students.xml"));
            System.out.println("version=" + document.getXmlVersion() + "   encoding=" + document.getXmlEncoding()
                    + "   standalone=" + document.getXmlStandalone());
            
            traverseNode(document);
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

2. 获取student.xml文档student元素下的指定name、age元素的值

        DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
        
        DocumentBuilder builder = builderFactory.newDocumentBuilder();
        
        Document document = builder.parse(new File("src/students.xml"));
        
        NodeList nodeList = document.getElementsByTagName("student");
        
        for (int i=0; i<nodeList.getLength(); i++) {
            Element elementStu = (Element) nodeList.item(i);
            Node elementName = elementStu.getElementsByTagName("name").item(0);
            Node elementAge = elementStu.getElementsByTagName("age").item(0);
            
            String name = elementName.getFirstChild().getNodeValue();
            String age = elementAge.getFirstChild().getNodeValue();
            
            System.out.println("-----------学生信息--------------");
            System.out.println("姓名:" + name);
            System.out.println("年龄:" + age);
        }

3. 在DOM文档树上增删节点

            DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = builderFactory.newDocumentBuilder();
            Document document = builder.parse(new File("src/students.xml"));
            
            
            Element studentElem = document.createElement("student");
            Element nameElem = document.createElement("name");
            Element ageElem = document.createElement("age");
            
            Attr attr = document.createAttribute("sn");
            attr.setValue("03");
            Text txtName = document.createTextNode("王五");
            Text txtAge = document.createTextNode("19");
            
            studentElem.appendChild(txtName);
            ageElem.appendChild(txtAge);
            studentElem.setAttributeNode(attr);
            studentElem.appendChild(nameElem);
            studentElem.appendChild(ageElem);
            document.getDocumentElement().appendChild(studentElem); //在文档根元素节点下,添加一个student元素节点
            
            NodeList nodeList = document.getElementsByTagName("student");
            Node nodeDel = nodeList.item(0);
            nodeDel.getParentNode().removeChild(nodeDel); //移除第1个student元素节点
            
            
            DOMSource source = new DOMSource(document);
            StreamResult result = new StreamResult(new File("src/convert.xml"));
            
            TransformerFactory transformerFactory = TransformerFactory.newInstance();
            Transformer transformer = transformerFactory.newTransformer();
            transformer.transform(source, result); //把DOM文档树保存到src/convert.xml文档中

 

dom4j解析XML

1. 生成XML文档

        /**
         * 构造响应消息
         *
         *      <validateResponse>
         *         <id>RDF-101</id>
         *         <desc>RDF Validation Result</desc>
         *         <content>sdfdsfsdfdsf</content>
         *      </validateResponse>
         *
         */
        Document document = DocumentHelper.createDocument();
        Element valrespEle = document.addElement("validateResponse");  
  
        valrespEle.addElement("id").setText("RDF-101");  
        valrespEle.addElement("desc").setText("RDF Validation Result");
        valrespEle.addElement("content").setText("sjflafl速度就废了");
        
        System.out.println(document.asXML());

2. 从格式化的XML字符串中取出指定元素的值

        String xmlString = 
                "<validate type=\"rdfValidation\">"
                        + "<id>RDF-101</id>"
                        + "<desc>validate content</desc>"
                        + "<schema>http://iec.ch/TC57/2005/CIM-schema-cim10#</schema>"
                        + "<content>gfasdgadgdfhfjghjghkjkfkdhsgdasgdsagfdgfsdgsfhfgd</content>"
              + "</validate>";
        
        
        Document document = DocumentHelper.parseText(xmlString);
        
        Attribute typeAttr = (Attribute) document.selectSingleNode("/validate/@type");
        
        Element idEle = (Element) document.selectSingleNode("/validate/id");
        
        Element schemaEle = (Element) document.selectSingleNode("/validate/schema");
        
        Element contentEle = (Element) document.selectSingleNode("/validate/content");
        
        System.out.println(typeAttr.getText() + "\n" + idEle.getText() + "\n" + schemaEle.getText() + "\n" + contentEle.getText());

 

SAX解析xml

1. 解析student.xml,解析器实现ContentHandler接口

package sax.parsing;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.*;

public class SAXDemo {

    public static void main(String[] args) throws Exception {

        // sax的解析工厂
        SAXParserFactory spf = SAXParserFactory.newInstance();

        // sax的解析器
        SAXParser sparser = spf.newSAXParser();

        // 获取阅读器
        XMLReader xReader = sparser.getXMLReader();

        xReader.setContentHandler(new MyContentHandler());
        // 解析xml文档
        xReader.parse("src//book.xml");
    }
}

class MyContentHandler implements ContentHandler {

    @Override
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
        System.out.println("<" + qName + ">");
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        System.out.println("</" + qName + ">");
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        System.out.println(new String(ch, start, length));
    }

    @Override
    public void endDocument() throws SAXException {
        // TODO Auto-generated method stub
    }

    @Override
    public void endPrefixMapping(String prefix) throws SAXException {
        // TODO Auto-generated method stub
    }

    @Override
    public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
        // TODO Auto-generated method stub
    }

    @Override
    public void processingInstruction(String target, String data) throws SAXException {
        // TODO Auto-generated method stub
    }

    @Override
    public void setDocumentLocator(Locator locator) {
        // TODO Auto-generated method stub
    }

    @Override
    public void skippedEntity(String name) throws SAXException {
        // TODO Auto-generated method stub
    }

    @Override
    public void startDocument() throws SAXException {
        // TODO Auto-generated method stub
    }

    @Override
    public void startPrefixMapping(String prefix, String uri) throws SAXException {
        // TODO Auto-generated method stub
    }
}

输出结果:

<students>
  <student>
    <name>张三</name>
    <age>18</age>
    <score>100</score>
  </student>


  <student>
    <name>lisi</name>
    <age>20</age>
    <score>100</score>
  </student>
</students>

 

 

2. 解析器继承DefaultHandler类(默认解析类)

package sax.parsing;


import java.io.File;
import java.io.IOException;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

public class SaxPrinter extends DefaultHandler {
    @Override
    public void startDocument() throws SAXException {
        System.out.println("<?xml version='1.0' encoding='gb2312' ?>");
    }
    @Override
    public void processingInstruction(String target, String data) throws SAXException {
        System.out.println("<?" + target + " " + data + "?>");
    }
    
    @Override
    public void startElement(String uri, String localName, String qname, Attributes attrs) throws SAXException {
        System.out.print("<" + qname);
        for (int i=0; i<attrs.getLength(); i++) {
            System.out.print("  " + attrs.getQName(i) + "=\"" + attrs.getValue(i) + "\"");
        }
        System.out.print(">");
    }
    
    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        System.out.print("</" + qName + ">");
    }
    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        System.out.print(new String(ch, start, length));
    }
    
    
    public static void main(String[] args) {
        
        SAXParserFactory parserFactory = SAXParserFactory.newInstance();
        try {
            SAXParser parser = parserFactory.newSAXParser();
            parser.parse(new File("src/students.xml"), new SaxPrinter());
        } catch (ParserConfigurationException e) {
            e.printStackTrace();
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }     
    }
}

输出结果:

<?xml version='1.0' encoding='gb2312' ?>
<students>
  <student sn="01">
    <name>张三</name>
    <age>18</age>
    <score>100</score>
  </student>


  <student sn="02">
    <name>lisi</name>
    <age>20</age>
    <score>100</score>
  </student>
</students>

 

 

3. 依据XSD对XML进行校验

src/students.xsd

<?xml version="1.0" encoding="utf-8" ?>

<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
    <xs:element name="students">
        <xs:complexType>
            <xs:sequence>
                <xs:element name="student" type="studentType" maxOccurs="unbounded" />
            </xs:sequence>
        </xs:complexType>
    </xs:element>
    
    <xs:complexType name="studentType">
        <xs:sequence>
            <xs:element name="name" type="xs:token" />
            <xs:element name="age" type="xs:positiveInteger" />
            <xs:element name="score" type="xs:float" />
        </xs:sequence>
        <xs:attribute name="sn" type="xs:token" />
    </xs:complexType>
</xs:schema>

src/students.xml

<?xml version="1.0" encoding="utf-8" ?>

<students>
    <student sn="01" attr_test="errorAttr"><!-- 在student与name父子元素节点之间的是一个文本节点('\n\t\t') -->
        <name>张三</name>
        <age>18</age>
        <score>100</score>
    </student>

    <elem_test1 attr_test1="errorAttr1" />
    
    <student sn="02">
        <name>lisi</name>
        <age>20</age>
        <score>100</score>
    </student>
    
    <elem_test2 attr_test1="errorAttr2" />
</students>

 

实现ErrorHandler接口,执行校验:

package sax.parsing;

import java.io.File;
import java.io.IOException;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;

import org.testng.annotations.Test;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

public class SchemaValidator implements ErrorHandler {

    @Override
    public void warning(SAXParseException exception) throws SAXException {
        System.out.println("触发警告:");
        System.err.println("warning: " + getLocationString(exception) + ": " + exception.getMessage());
    }

    @Override
    public void error(SAXParseException exception) throws SAXException {
        System.out.println("触发错误:");
        System.err.println("error: " + getLocationString(exception) + ": " + exception.getMessage());
    }

    @Override
    public void fatalError(SAXParseException exception) throws SAXException {
        System.out.println("触发致命错误:");
        System.err.println("fatal error: " + getLocationString(exception) + ": " + exception.getMessage());
    }
    
    
    private String getLocationString(SAXParseException ex) {
        
        StringBuffer buffer = new StringBuffer();
        
        String systemId = ex.getSystemId();
        if (systemId != null) {
            buffer.append(systemId).append("  ");
        }
        buffer.append(ex.getLineNumber()).append(':').append(ex.getColumnNumber());
        
        return buffer.toString();
    }
    
    
    /**
     * 从模式实例中获取校验器 
     */
    //@Test
    public void validatorFromSchema() {
        
        //模式工厂
        SchemaFactory schemaFactory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
        
        
        try {
            //从工厂中实例化出一个模式对象
            Schema schema = schemaFactory.newSchema(new File("src/students.xsd"));
            Validator validator = schema.newValidator();  //实例化模式对应的校验器
            validator.setErrorHandler(new SchemaValidator()); //设置校验的错误处理
            validator.validate(new StreamSource(new File("src/students.xml")));
        } catch (SAXException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}
输出结果:

触发错误:
error: file:/D:/eclipse-luna-jee/workspace/xsl_trans/src/students.xml 4:41: cvc-complex-type.3.2.2: Attribute 'attr_test' is not allowed to appear in element 'student'.
触发错误:
error: file:/D:/eclipse-luna-jee/workspace/xsl_trans/src/students.xml 10:40: cvc-complex-type.2.4.a: Invalid content was found starting with element 'elem_test1'. One of '{student}' is expected.

 

 

 

org.w3c.dom.Node

 

The Node interface is the primary datatype for the entire Document Object Model.

Node接口是DOM的主要数据类型,代表了文档树上的一个单个节点

It represents a single node in the document tree.

While all objects implementing the Node interface expose methods for dealing with children, not all objects implementing the Node interface may have children.

尽管实现Node接口的所有对象都暴露了处理子节点的方法,但并不是实现Node的所有对象都会有子节点。例如Text文本节点就没有子节点

For example, Text nodes may not have children, and adding children to such nodes results in a DOMException being raised.

The attributes nodeName, nodeValue and attributes are included as a mechanism to get at node information without casting down to the specific derived interface.

 

In cases where there is no obvious mapping of these attributes for a specific nodeType (e.g., nodeValue for an Element or attributes for a Comment ), this returns null.

Note that the specialized interfaces may contain additional and more convenient mechanisms to get and set the relevant information.

The values of nodeName, nodeValue, and attributes vary according to the node type as follows:

根据nodeType的类型,nodeName, nodeValue, attributes取值相应变化:

Interface

nodeName

nodeValue

attributes

Attr

same as Attr.name

same as Attr.value

null

CDATASection

"#cdata-section"

same as CharacterData.data, the content of the CDATA Section

null

Comment

"#comment"

same as CharacterData.data, the content of the comment

null

Document

"#document"

null

null

DocumentFragment

"#document-fragment"

null

null

DocumentType

same as DocumentType.name

null

null

Element

same as Element.tagName

null

NamedNodeMap

Entity

entity name

null

null

EntityReference

name of entity referenced

null

null

Notation

notation name

null

null

ProcessingInstruction

same as ProcessingInstruction.target

same as ProcessingInstruction.data

null

Text

"#text"

same as CharacterData.data, the content of the text node

null

 

 

xml文档树(DOM树上的每个节点都是Node接口的实例)

  文档根节点 Document document = builder.parse(new InputSource(new FileInputStream("src/students.xml")));

  文档根元素节点 Element root = document.getDocumentElement();

posted @ 2015-01-20 13:37  asnjudy  阅读(433)  评论(0编辑  收藏  举报