java Jsoup 抓取页面数据
List<ImageBean> imgList = new ArrayList<ImageBean>(); ImageBean image = null; String imageTime = ""; String imageName = ""; String url = ""; for (Map.Entry<String, String> entry : map.entrySet()) { try { Document doc = Jsoup.connect(entry.getKey()).get(); Elements scripts = doc.select("script"); JSONObject obj = null; String[] datas = entry.getValue().split(this.split); for (int i = 0; i < scripts.size(); i++) { Element script = scripts.get(i); // Get the script part Pattern p = Pattern.compile(datas[3]); // 匹配图片链接地址的正则表达式 Matcher m = p.matcher(script.html()); // 匹配的字符串 while (m.find()) { image = new ImageBean(); String matchStr = m.group(1); obj = JSONObject.parseObject(matchStr); url = datas[1] + obj.getString(datas[4]); image.setUrl(url); imageTime = getImageTime(url); image.setName(imageTime); image.setType(datas[3]); image.setImageType(datas[5]); imgList.add(image); } } } catch (IOException e) { e.printStackTrace(); } }
<entry key="http://www.nmc.cn/publish/nwp/t639/ea/500hPa-hgt.html"> <value>高度场~http://image.nmc.cn~type~data.push\((\{*.*?\})\)~img_path~nmc_fore_t639_hgt</value> </entry>