Html解析本地搜网站
业务类
1 package code.lxy.test; 2 3 import java.io.File; 4 import java.io.FileNotFoundException; 5 import java.io.FileOutputStream; 6 import java.io.PrintWriter; 7 8 import org.htmlparser.Node; 9 import org.htmlparser.NodeFilter; 10 import org.htmlparser.Parser; 11 import org.htmlparser.tags.Div; 12 import org.htmlparser.tags.LinkTag; 13 import org.htmlparser.util.NodeList; 14 import org.htmlparser.util.ParserException; 15 16 public class HtmlParserDemo { 17 public static void parserHtml(String htmlToParser) 18 throws FileNotFoundException { 19 PrintWriter writer = new PrintWriter(new FileOutputStream(new File( 20 "d:/test.text"))); 21 Parser parser = new Parser(); 22 try { 23 parser.setURL(htmlToParser); 24 parser.setEncoding("UTF-8"); 25 NodeFilter filter = new NodeFilter() { 26 @Override 27 public boolean accept(Node node) { 28 // TODO Auto-generated method stub 29 if (node instanceof Div) { 30 Div divNode = (Div) node; 31 // System.out.println(divNode.getAttribute("class")); 32 if (divNode.getAttribute("class") != null) { 33 if (divNode.getAttribute("class").endsWith("zuo01_bt")||divNode.getAttribute("class").endsWith("zuo01_con")) { 34 return true; 35 } 36 } 37 } 38 return false; 39 } 40 }; 41 NodeList nodelist = parser.extractAllNodesThatMatch(filter); 42 for (int i = 0; i < nodelist.size(); i++) { 43 /*Div divNode=(Div) nodelist.elementAt(i); 44 System.out.println(divNode.toPlainTextString());*/ 45 Div divnode=(Div) nodelist.elementAt(i); 46 String test=divnode.getAttribute("class"); 47 if(divnode.getAttribute("class").equals("zuo01_bt")) 48 { 49 LinkTag linkTag=(LinkTag) divnode.childAt(1); 50 System.out.println(linkTag.getAttribute("title")); 51 }else{ 52 System.out.println(divnode.toPlainTextString()); 53 } 54 } 55 writer.close(); 56 } catch (ParserException e) { 57 // TODO Auto-generated catch block 58 e.printStackTrace(); 59 } 60 } 61 }
测试类
package code.lxy.main; import java.io.FileNotFoundException; import code.lxy.test.HtmlParserDemo; public class MainClass { /** * @param args * @throws FileNotFoundException */ public static void main(String[] args) throws FileNotFoundException { // TODO Auto-generated method stub HtmlParserDemo.parserHtml("http://www.locoso.com/cate/0sts2"); } }
结果输出显示