java抓取快递信息
package zeze; import java.io.IOException; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class TestJsoup { public Document getDocument(String url) { try { return Jsoup.connect(url).get(); } catch (IOException e) { e.printStackTrace(); } return null; } public static void main(String[] args) throws IOException { TestJsoup t = new TestJsoup(); String url ="http://www.kiees.cn/yto.php?wen=880350384879600241&action=ajax";//获取地址 Connection conn = Jsoup.connect(url); // 设置查询参数 conn.data("wen", "880350384879600241"); Document doc = null; // 设置请求类型 doc = conn.timeout(100000).get(); //处理返回数据 Elements results = new Elements(); results = doc.select("[class = yto]"); int i=0; for (Element result : results) { Elements links = result.getElementsByTag("td"); for (Element link : links) { //必要的筛选 String val = link.text(); if(i%2==0) System.out.println("时间:" + val); else System.out.println("状态:" + val); i++; } } /*Elements zeze0 = new Elements(); Elements zeze1 = new Elements(); Elements zeze2 = new Elements(); String name; String price; String sales; int count=0; String key = "520675407117"; for (int i = 0; i < 3; i++) { Document doc = t.getDocument("https://item.taobao.com/item.htm?id=" + key + ""); String b = key.substring(0, 5); String c = key.substring(5, 12); int d = Integer.parseInt(c); key = b + (d + 1); // 获取目标HTML代码 zeze0 = doc.select("[class= tb-main-title]"); name = zeze0.attr("data-title");//商品名 zeze1 = doc.select("[class=tb-rmb-num]"); price = zeze1.text();//价格 zeze2 = doc.select("#J_SellCounter"); sales = zeze2.text();//销量 if (name != "") { count++; System.out.println("id:" + key); System.out.println("商品:" + name); System.out.println("价格:" + price); System.out.println("销量:" + sales); } } System.out.println("本次采集总数:"+count); System.out.println("end");*/ } }
1 package zeze; 2 3 import java.io.IOException; 4 5 import org.jsoup.Connection; 6 import org.jsoup.Jsoup; 7 import org.jsoup.nodes.Document; 8 import org.jsoup.nodes.Element; 9 import org.jsoup.select.Elements; 10 11 public class TestJsoup { 12 13 public Document getDocument(String url) { 14 try { 15 return Jsoup.connect(url).get(); 16 } catch (IOException e) { 17 e.printStackTrace(); 18 } 19 return null; 20 } 21 22 public static void main(String[] args) throws IOException { 23 TestJsoup t = new TestJsoup(); 24 String url = "http://www.zto.cn/GuestService/Bill";// 获取地址 25 Connection conn = Jsoup.connect(url); 26 27 String key = "368853112593"; 28 29 // 设置查询参数 30 for (int j = 0; j < 100; j++) { 31 String b = key.substring(0, 8); 32 String c = key.substring(8,12); 33 int d = Integer.parseInt(c); 34 key = b + (d + 1); 35 System.out.println("key:" + key); 36 conn.data("txtBill", key); 37 Document doc = null; 38 // 设置请求类型 39 doc = conn.post(); 40 // 处理返回数据 41 Elements results = new Elements(); 42 results = doc.select("[class = state]"); 43 int i = 0; 44 for (Element result : results) { 45 Elements links = result.select("[class = routeTips]"); 46 for (Element link : links) { 47 // 必要的筛选 48 String val = link.text(); 49 if (i % 2 == 0) 50 System.out.println("时间:" + val); 51 else 52 System.out.println("状态:" + val); 53 i++; 54 } 55 } 56 } 57 58 /* 59 * Elements zeze0 = new Elements(); Elements zeze1 = new Elements(); 60 * Elements zeze2 = new Elements(); String name; String price; String 61 * sales; int count=0; String key = "520675407117"; for (int i = 0; i < 62 * 3; i++) { Document doc = 63 * t.getDocument("https://item.taobao.com/item.htm?id=" + key + ""); 64 * String b = key.substring(0, 5); 65 * 66 * String c = key.substring(5, 12); 67 * 68 * int d = Integer.parseInt(c); key = b + (d + 1); // 获取目标HTML代码 zeze0 = 69 * doc.select("[class= tb-main-title]"); name = 70 * zeze0.attr("data-title");//商品名 71 * 72 * zeze1 = doc.select("[class=tb-rmb-num]"); price = zeze1.text();//价格 73 * 74 * zeze2 = doc.select("#J_SellCounter"); sales = zeze2.text();//销量 75 * 76 * 77 * if (name != "") { count++; System.out.println("id:" + key); 78 * System.out.println("商品:" + name); System.out.println("价格:" + price); 79 * System.out.println("销量:" + sales); } } 80 * System.out.println("本次采集总数:"+count); System.out.println("end"); 81 */ 82 83 } 84 }
1 package zeze; 2 3 import java.io.IOException; 4 5 import org.jsoup.Connection; 6 import org.jsoup.Jsoup; 7 import org.jsoup.nodes.Document; 8 import org.jsoup.nodes.Element; 9 import org.jsoup.select.Elements; 10 11 public class TestJsoup { 12 13 public Document getDocument(String url) { 14 try { 15 return Jsoup.connect(url).get(); 16 } catch (IOException e) { 17 e.printStackTrace(); 18 } 19 return null; 20 } 21 22 public static void main(String[] args) throws IOException { 23 TestJsoup t = new TestJsoup(); 24 String url = "http://www.kiees.cn/yto.php?wen=880350384879600241&action=ajax";// 获取地址 25 Connection conn = Jsoup.connect(url); 26 27 String key = "880350384879600241"; 28 29 // 设置查询参数 30 for (int j = 0; j < 100; j++) { 31 String b = key.substring(0, 11); 32 String c = key.substring(11,17); 33 int d = Integer.parseInt(c); 34 key = b + (d + 1); 35 System.out.println("key:" + key); 36 conn.data("wen", key); 37 Document doc = null; 38 // 设置请求类型 39 doc = conn.timeout(100000).get(); 40 // 处理返回数据 41 Elements results = new Elements(); 42 results = doc.select("[class = yto]"); 43 int i = 0; 44 for (Element result : results) { 45 Elements links = result.getElementsByTag("td"); 46 for (Element link : links) { 47 // 必要的筛选 48 String val = link.text(); 49 if (i % 2 == 0) 50 System.out.println("时间:" + val); 51 else 52 System.out.println("状态:" + val); 53 i++; 54 } 55 } 56 } 57 58 /* 59 * Elements zeze0 = new Elements(); Elements zeze1 = new Elements(); 60 * Elements zeze2 = new Elements(); String name; String price; String 61 * sales; int count=0; String key = "520675407117"; for (int i = 0; i < 62 * 3; i++) { Document doc = 63 * t.getDocument("https://item.taobao.com/item.htm?id=" + key + ""); 64 * String b = key.substring(0, 5); 65 * 66 * String c = key.substring(5, 12); 67 * 68 * int d = Integer.parseInt(c); key = b + (d + 1); // 获取目标HTML代码 zeze0 = 69 * doc.select("[class= tb-main-title]"); name = 70 * zeze0.attr("data-title");//商品名 71 * 72 * zeze1 = doc.select("[class=tb-rmb-num]"); price = zeze1.text();//价格 73 * 74 * zeze2 = doc.select("#J_SellCounter"); sales = zeze2.text();//销量 75 * 76 * 77 * if (name != "") { count++; System.out.println("id:" + key); 78 * System.out.println("商品:" + name); System.out.println("价格:" + price); 79 * System.out.println("销量:" + sales); } } 80 * System.out.println("本次采集总数:"+count); System.out.println("end"); 81 */ 82 83 } 84 }