NekoHTML and Dom4j
http://pro.ctlok.com/2010/07/java-read-html-dom4j-nekohtml.html
package com.ctlok.pro; import java.io.IOException; import org.cyberneko.html.parsers.DOMParser; import org.dom4j.Document; import org.dom4j.Node; import org.dom4j.io.DOMReader; import org.xml.sax.SAXException; public class Main { /** * @param args */ public static void main(String[] args) { try{ String url = "http://hk.finance.yahoo.com/q?s=0005.HK"; DOMParser parser = new DOMParser(); parser.parse(url); org.w3c.dom.Document document = parser.getDocument(); DOMReader domReader = new DOMReader(); Document doc = domReader.read(document); //Element name should be upper case Node name = doc.selectSingleNode("//DIV[@id='quote-bar-latest']/*/H2/node()"); Node buy = doc.selectSingleNode("//DIV[@id='quote-bar-trade-info']/TABLE/TBODY/TR[1]/TD[2]"); Node sell = doc.selectSingleNode("//DIV[@id='quote-bar-trade-info']/TABLE/TBODY/TR[2]/TD[2]"); System.out.println(name.getText()); System.out.println("Buy: " + buy.getText().substring(2)); System.out.println("Sell: " + sell.getText().substring(2)); } catch (SAXException e) { System.out.println(e.toString()); } catch (IOException e) { System.out.println(e.toString()); } } }