Html解析本地搜网站

业务类

 1 package code.lxy.test;
 2 
 3 import java.io.File;
 4 import java.io.FileNotFoundException;
 5 import java.io.FileOutputStream;
 6 import java.io.PrintWriter;
 7 
 8 import org.htmlparser.Node;
 9 import org.htmlparser.NodeFilter;
10 import org.htmlparser.Parser;
11 import org.htmlparser.tags.Div;
12 import org.htmlparser.tags.LinkTag;
13 import org.htmlparser.util.NodeList;
14 import org.htmlparser.util.ParserException;
15 
16 public class HtmlParserDemo {
17     public static void parserHtml(String htmlToParser)
18             throws FileNotFoundException {
19         PrintWriter writer = new PrintWriter(new FileOutputStream(new File(
20                 "d:/test.text")));
21         Parser parser = new Parser();
22         try {
23             parser.setURL(htmlToParser);
24             parser.setEncoding("UTF-8");
25             NodeFilter filter = new NodeFilter() {
26                 @Override
27                 public boolean accept(Node node) {
28                     // TODO Auto-generated method stub
29                     if (node instanceof Div) {
30                         Div divNode = (Div) node;
31                         // System.out.println(divNode.getAttribute("class"));
32                         if (divNode.getAttribute("class") != null) {
33                             if (divNode.getAttribute("class").endsWith("zuo01_bt")||divNode.getAttribute("class").endsWith("zuo01_con")) {
34                                 return true;
35                             }
36                         }
37                     }
38                     return false;
39                 }
40             };
41             NodeList nodelist = parser.extractAllNodesThatMatch(filter);
42             for (int i = 0; i < nodelist.size(); i++) {
43                 /*Div divNode=(Div) nodelist.elementAt(i);
44                 System.out.println(divNode.toPlainTextString());*/
45                 Div divnode=(Div) nodelist.elementAt(i);
46                 String test=divnode.getAttribute("class");
47                 if(divnode.getAttribute("class").equals("zuo01_bt"))
48                 {
49                     LinkTag linkTag=(LinkTag) divnode.childAt(1);
50                     System.out.println(linkTag.getAttribute("title"));
51                 }else{
52                     System.out.println(divnode.toPlainTextString());
53                 }
54             }
55             writer.close();
56         } catch (ParserException e) {
57             // TODO Auto-generated catch block
58             e.printStackTrace();
59         }
60     }
61 }

测试类

package code.lxy.main;

import java.io.FileNotFoundException;

import code.lxy.test.HtmlParserDemo;

public class MainClass {

    /**
     * @param args
     * @throws FileNotFoundException 
     */
    public static void main(String[] args) throws FileNotFoundException {
        // TODO Auto-generated method stub
        HtmlParserDemo.parserHtml("http://www.locoso.com/cate/0sts2");
    }

}

结果输出显示

posted @ 2013-04-14 16:18  dependmyse  阅读(198)  评论(0编辑  收藏  举报