NekoHTML and Dom4j

http://pro.ctlok.com/2010/07/java-read-html-dom4j-nekohtml.html

 

package com.ctlok.pro;
 
import java.io.IOException;
 
import org.cyberneko.html.parsers.DOMParser;
import org.dom4j.Document;
import org.dom4j.Node;
import org.dom4j.io.DOMReader;
import org.xml.sax.SAXException;
 
public class Main {
 
    /**
     * @param args
     */
    public static void main(String[] args) {
        try{
            String url = "http://hk.finance.yahoo.com/q?s=0005.HK";
            
            DOMParser parser = new DOMParser();
            parser.parse(url);
            
            org.w3c.dom.Document document = parser.getDocument();
            DOMReader domReader = new DOMReader();  
            Document doc = domReader.read(document);
            
            //Element name should be upper case
            Node name = doc.selectSingleNode("//DIV[@id='quote-bar-latest']/*/H2/node()");
            Node buy = doc.selectSingleNode("//DIV[@id='quote-bar-trade-info']/TABLE/TBODY/TR[1]/TD[2]");
            Node sell = doc.selectSingleNode("//DIV[@id='quote-bar-trade-info']/TABLE/TBODY/TR[2]/TD[2]");
            
            System.out.println(name.getText());
            System.out.println("Buy: " + buy.getText().substring(2));
            System.out.println("Sell: " + sell.getText().substring(2));
        } catch (SAXException e) {
            System.out.println(e.toString());
        } catch (IOException e) {
            System.out.println(e.toString());
        }
    }
}

 

posted @ 2015-01-05 16:15  samu  阅读(209)  评论(0编辑  收藏  举报