(四)Jsoup 获取 DOM 元素属性值
第一节: Jsoup 获取 DOM 元素属性值
Jsoup获取DOM元素属性值
比如我们要获取博客的href属性值:
我们这时候就要用到Jsoup来获取属性的值 :
我们给下示例代码:
1 package com.javaxk.jsoup; 2 3 import org.apache.http.HttpEntity; 4 import org.apache.http.client.methods.CloseableHttpResponse; 5 import org.apache.http.client.methods.HttpGet; 6 import org.apache.http.impl.client.CloseableHttpClient; 7 import org.apache.http.impl.client.HttpClients; 8 import org.apache.http.util.EntityUtils; 9 import org.jsoup.Jsoup; 10 import org.jsoup.nodes.Document; 11 import org.jsoup.nodes.Element; 12 import org.jsoup.select.Elements; 13 14 public class Demo4 { 15 16 public static void main(String[] args) throws Exception{ 17 CloseableHttpClient httpclient = HttpClients.createDefault(); // 创建httpclient实例 18 HttpGet httpget = new HttpGet("http://www.cnblogs.com/"); // 创建httpget实例 19 20 CloseableHttpResponse response = httpclient.execute(httpget); // 执行get请求 21 HttpEntity entity=response.getEntity(); // 获取返回实体 22 String content=EntityUtils.toString(entity, "utf-8"); 23 response.close(); // 关闭流和释放系统资源 24 25 Document doc=Jsoup.parse(content); // 解析网页 得到文档对象 26 27 Elements linkElements=doc.select("#post_list .post_item .post_item_body h3 a"); //通过选择器查找所有博客链接DOM 28 for(Element e:linkElements){ 29 System.out.println("博客标题:"+e.text()); 30 System.out.println("博客地址:"+e.attr("href")); 31 System.out.println("target:"+e.attr("target")); 32 } 33 34 Element linkElement=doc.select("#friend_link").first(); 35 System.out.println("纯文本:"+linkElement.text()); 36 System.out.println("html:"+linkElement.html()); 37 38 39 } 40 }