查找网页元素并且输出到固定文件

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class HrefTest {
    
   /**
    * 输出文件方法
    * @param filePath
    * @param sb
    * @throws IOException
    */
    public static void outHref(String filePath,StringBuffer sb) throws IOException{
        
        try {
            File file = new File(filePath);
            PrintWriter writer = new PrintWriter(new FileOutputStream(file));  
             writer.write(sb.toString());
             writer.close();
             
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }
    }
    /**
     * 定位元素
     * @param args
     */

    public static void main(String[] args) {
        String  filePath="/Users/liuqi/Desktop/result.log";
        final String url="http://www.baidu.com/";
        String str="";
        StringBuffer sb=new StringBuffer();
        try {
            Document doc=Jsoup.connect(url).get();
            Elements links=doc.getElementsByTag("a");
            for(Element link:links){
                if(link.attr("href").contains("baidu"))
                     str=link.attr("href").toString();
                     sb.append(str+"\n");
                     System.out.println(link.attr("href"));
                     outHref(filePath, sb);
            }
            
        } catch (IOException e) {
            e.printStackTrace();
        }
        

    }
    

}

需要加载jsoup-1.10.1.jar这个jar

posted @ 2017-05-02 11:30  liuqiceshi  阅读(316)  评论(0编辑  收藏  举报