jsoup-处理html中的script数据
/** * 价值在线数据-左边分类抓取 * http://www.valueonline.cn/laws/laws?typeid=96219074211635284 * @author hwaggLee */ public class UtilsHtmValueonLineType { public static void main(String[] args) { String url = "http://www.valueonline.cn/laws/laws?typeid=96219074211635284"; readHtml(url); } public static List<Object> readHtml(String url){ List<Object> list = new ArrayList<Object>(); // Document doc = null; try { doc = Jsoup.connect(url).get(); } catch (Exception e) { ///e.printStackTrace(); System.out.println(e.getMessage()+":--------------->"+url); } if( doc == null )return list; Elements elScripts = doc.getElementsByTag("script"); String[] elScriptList = elScripts.get(0).data().toString().split("var"); String strTypeList = elScriptList[2]; if( StringUtils.isNotBlank(strTypeList)){ /*strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1); JSONArray array = JSONArray.fromObject(strTypeList); JSONArray arrayList = JSONArray.fromObject(array.get(0)); for (Object o : arrayList) { JSONObject object = JSONObject.fromObject(o); StringBuilder sb = new StringBuilder(); sb.append("insert into n3b_vl_plate_type values "); sb.append(" ( "); sb.append("'"+object.get("id")+"'"); sb.append(",'"+object.get("parentId")+"'"); sb.append(","+object.get("level")+""); sb.append(",'"+object.get("declareTypeName")+"'"); sb.append(",'"+object.get("declareTypeNo")+"'"); sb.append(",'"+object.get("validFlag")+"'"); sb.append(","+object.get("oftenFlag")+""); sb.append(",'"+object.get("showTypeName")+"'"); sb.append(" ); "); System.out.println(sb.toString()); }*/ } strTypeList = elScriptList[3]; System.out.println(strTypeList); if( StringUtils.isNotBlank(strTypeList) ){ strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1); JSONArray arrayList = JSONArray.fromObject(strTypeList); for (Object o : arrayList) { JSONObject object = JSONObject.fromObject(o); StringBuilder sb = new StringBuilder(); sb.append("insert into n3b_vl_market_type values "); sb.append(" ( "); sb.append("'0"+object.get("code_value")+"'"); sb.append(",'"+object.get("code_name")+"'"); sb.append(",'"+object.get("code_no")+"'"); sb.append(",'"+object.get("code_value")+"'"); sb.append(",'"+object.get("valid_flag")+"'"); sb.append(",'"+object.get("version")+"'"); sb.append(",'"+object.get("code_type")+"'"); sb.append(" ); "); System.out.println(sb.toString()); } } return list; } }
知识只有共享才能传播,才能推崇出新的知识,才能学到更多,这里写的每一篇文字/博客,基本都是从网上查询了一下资料然后记录下来,也有些是原滋原味搬了过来,也有时加了一些自己的想法