03_Jsoup
【1.获取一个页面所有的链接】
public static void main(String[] args) throws IOException { String url="http://www.cnblogs.com/HigginCui/"; Document doc=Jsoup.connect(url).get(); //下载并解析成html DOM结构 System.out.println(doc); //这里打印就是整个html页面 Elements links=doc.select("a[href]"); //使用select方法选择元素 System.out.println(links.size()); for(Element link:links){ System.err.println("<a href=\""+link.attr("abs:href")+"\"> ("+link.text()+") </a>"); } }
【运行结果】
【02】
@Test public void test01(){ String html="<p> " + " <a href='http://example.com/'>" + " <b>霸气</b>" + " </a> " + " link." + "</p>"; Document doc=Jsoup.parse(html); Element ele=doc.select("a").first(); //查找第一个a元素 System.out.println("ele.text()==="+ele.text()); //Element.text()获取标签的文本值 System.out.println("ele.attr(\"href\")==="+ele.attr("href")); String linkOuter=ele.outerHtml(); System.out.println("ele.outerHtml()==="+linkOuter); }
【运行结果】
【03.根据id获取对应的Element】
@Test public void test02(){ String html="<p id=\"ppp\" value=\"i am best!\">哈哈哈 </p>"; Document doc=Jsoup.parse(html); Element ele=doc.select("#ppp").first(); //查找第一个a元素 System.out.println(ele.attr("value")); System.out.println(ele.text()); }
【运行结果】
【04】
@Test public void test03(){ String html="<div id=\"zxSale\">" + " <table class=\"sssss\">" + " <tbody>" + " <tr value=\"tttttrrrrr\">" + " <td>2017-02-22</td>" + " <td> 富国基金</td>" + " <td>嘉实基金</td>" + " </tr>" + " <tr value=\"tttttrrrrr\">" + " <td>2017-03-22</td>" + " <td>建信基金</td>" + " <td>易方达基金</td>" + " </tr>" + " </tbody>" + " </table>" + "</div>"; Document doc=Jsoup.parse(html); Elements trs=doc.select("#zxSale > table > tbody > tr"); for(Element tr:trs){ Elements tds=tr.getElementsByTag("td"); System.out.println(tds.get(0).ownText()); System.out.println(tds.get(1).ownText()); System.out.println(tds.get(2).ownText()); // System.out.println("这个是不存在的:"+tds.get(3).ownText()+"!"); //这里会报错 } }
【运行结果】
【05】
@Test public void test03_plus(){ String html="<div id=\"zxSale\">" + " <table class=\"sssss\">" + " <tbody>" + " <tr value=\"tttttrrrrr\">" + " <td>2017-02-22</td>" + " <td> 富国基金</td>" + " <td>嘉实基金</td>" + " </tr>" + " <tr value=\"tttttrrrrr\">" + " <td>2017-03-22</td>" + " <td>建信基金</td>" + " <td>易方达基金</td>" + " </tr>" + " </tbody>" + " </table>" + "</div>"; Document doc=Jsoup.parse(html); Elements trs=doc.select("#zxSale > table > tbody > tr"); for(Element tr:trs){ Elements tds=tr.getElementsByTag("td"); //根据标签获取元素 for(Element td : tds){ System.out.println(td.text()); } } }
【运行结果】
【06】
@Test public void test04(){ String html="<div id=\"zxSale\">" + " <div>" + " <select name=\"fundCode\">" + " <option value=\"\">全部</option>" + " <option value=\"000001\">华夏001</option>" + " <option value=\"000002\">华夏002</option>" + " <option value=\"000003\">华夏003</option>" + " <option value=\"000004\">华夏004</option>" + " <option value=\"000005\">华夏005</option>" + " <option value=\"000006\">华夏006</option>" + " </select>" + " </div>" + "</div>"; Document doc=Jsoup.parse(html); Elements options=doc.select("select[name=fundCode]").get(0).getElementsByTag("option"); if(options.size()>1){ for(Element option:options){ System.out.println("value==="+option.attr("value")); System.out.println("owntext==="+option.ownText()); } } }
【运行结果】