Jsoup爬虫最终修订版
package com.zdf.jsoup; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import org.springframework.stereotype.Controller; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.ResponseBody; import com.alibaba.fastjson.JSON; import com.zdf.jsoup.domain.Ticket; @Controller public class JsoupTest { @RequestMapping("/jsoupTest") public @ResponseBody List<Ticket> jsoupTest() { String url = "http://dynamic.12306.cn/otn/board/boardMore"; Connection connect = Jsoup.connect(url); try { List<Ticket> arr = new ArrayList<Ticket>(); Document document = connect.get(); Element table = document.getElementById("table_board_more"); // Elements tr = table.getElementsByTag("tr"); Elements tr = table.getElementsByTag("tr");//获取tr for (Element element : tr) { Elements th = element.getElementsByTag("th"); // for (Element element2 : th) { // // System.out.println("hello"+element2.text()); // } Ticket ticket = new Ticket(); for (int i = 0; i < element.children().size(); i++) { ticket.setStartPlace(element.child(0).text());//出发地 ticket.setEndPlace(element.child(1).text());//目的地 } System.out.println(ticket); arr.add(ticket); } return arr; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; } // 跳转页面 @RequestMapping("/") public String toVm() { return "velocity/ticket"; } }
另外解释一下:html/text和val的区别:
html添加带有标签的时候使用,text获取文本 val是只有当有value这个属性的时候才能使用比如:<input type="text" value="XX"></input>这个时候使用val()
<span>您好!</span>这个时候使用text
还有child和children的区别:child是一个需要写下标,children是所有该元素的子节点,返回值为elements