Jsoup Element网页信息采集

package zeze;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class TestJsoup {
    
    public  Document getDocument (String url){
        try {
            return Jsoup.connect(url).get();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    public static void main(String[] args) {
        TestJsoup t = new TestJsoup();
        Document doc = t.getDocument("http://www.weather.com.cn/html/weather/101280101.shtml");
        // 获取目标HTML代码
        Elements elements1 = doc.select("[class=dn on][data-dn=7d1]");
        // 今天
        Elements elements2 = elements1.select("h1");
        String today = elements2.get(0).text();
        System.out.println(today);
        // 几号
        Elements elements3 = elements1.select("h2");
        String number = elements3.get(0).text();
        System.out.println(number);
        // 是否有雨
        Elements elements4 = elements1.select("[class=wea]");
        String rain = elements4.get(0).text();
        System.out.println(rain);
        // 高的温度
        Elements elements5 = elements1.select("span");
        String highTemperature = elements5.get(0).text()+"°C";
        System.out.println(highTemperature);
        // 低的温度
        String lowTemperature = elements5.get(1).text()+"°C";
        System.out.println(lowTemperature);
        // 风力
        Elements elements6 = elements1.select("i");
        String wind = elements6.get(2).text();
        System.out.println(wind);
    }
}

 

package zeze;

import java.io.IOException;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;


public class TestJsoup {
    
    public  Document getDocument (String url){
        try {
            return Jsoup.connect(url).get();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return null;
    }

    public static void main(String[] args) {
        TestJsoup t = new TestJsoup();
        Document doc = t.getDocument("http://www.weather.com.cn/html/weather/101280101.shtml");
        // 获取目标HTML代码
        Elements elements1 = doc.select("[class=dn][data-dn=7d2]");//getElementsByClass("dn");
        // 今天
        Elements elements2 = elements1.select("h1");
        String today = elements2.get(0).text();
        System.out.println(today);
        // 几号
        Elements elements3 = elements1.select("h2");
        String number = elements3.get(0).text();
        System.out.println(number);
        // 是否有雨
        Elements elements4 = elements1.select("[class=wea]");
        String rain = elements4.get(0).text();
        System.out.println(rain);
        // 高的温度
        Elements elements5 = elements1.select("span");
        String highTemperature = elements5.get(0).text()+"°C";
        System.out.println(highTemperature);
        // 低的温度
        String lowTemperature = elements5.get(1).text()+"°C";
        System.out.println(lowTemperature);
        // 风力
        Elements elements6 = elements1.select("i");
        String wind = elements6.get(2).text();
        System.out.println(wind);
        
        Elements zeze = doc.select("[class=Lcontent]");
        for (Element result : zeze)
		{
			Elements links = result.getElementsByTag("a");
			for (Element link : links)
			{//筛选
				String linkHref = link.attr("href");
				String linkText = link.text();				
				System.out.println(linkHref+"\n"+linkText);
			}
		}
    }
}

  http://www.cnblogs.com/xiaoMzjm/p/3899366.html?utm_source=tuicool&utm_medium=referral

posted @ 2015-11-01 18:55  陈泽泽  阅读(962)  评论(0编辑  收藏  举报