jsoup-处理html中的script数据

 

 

 

/**
 * 价值在线数据-左边分类抓取
 * http://www.valueonline.cn/laws/laws?typeid=96219074211635284
 * @author hwaggLee
 */
public class UtilsHtmValueonLineType {
    
    public static void main(String[] args) {
        String url = "http://www.valueonline.cn/laws/laws?typeid=96219074211635284";
        readHtml(url);
    }
    
    public static List<Object> readHtml(String url){
        List<Object> list  = new ArrayList<Object>();
        //
        Document doc = null;
        try {
            doc = Jsoup.connect(url).get();
        } catch (Exception e) {
            ///e.printStackTrace();
            System.out.println(e.getMessage()+":--------------->"+url);
        }
        if( doc == null )return list;
        Elements elScripts = doc.getElementsByTag("script");  
        String[] elScriptList = elScripts.get(0).data().toString().split("var");
        String strTypeList = elScriptList[2];
        if( StringUtils.isNotBlank(strTypeList)){
            /*strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
            JSONArray array = JSONArray.fromObject(strTypeList);
            JSONArray arrayList = JSONArray.fromObject(array.get(0));
            for (Object o : arrayList) {
                JSONObject object = JSONObject.fromObject(o);
                StringBuilder sb = new StringBuilder();
                sb.append("insert into n3b_vl_plate_type values ");
                sb.append(" ( ");
                sb.append("'"+object.get("id")+"'");
                sb.append(",'"+object.get("parentId")+"'");
                sb.append(","+object.get("level")+"");
                sb.append(",'"+object.get("declareTypeName")+"'");
                sb.append(",'"+object.get("declareTypeNo")+"'");
                sb.append(",'"+object.get("validFlag")+"'");
                sb.append(","+object.get("oftenFlag")+"");
                sb.append(",'"+object.get("showTypeName")+"'");
                sb.append(" ); ");
                System.out.println(sb.toString());
            }*/
        }
        
        strTypeList = elScriptList[3];
        System.out.println(strTypeList);
        if( StringUtils.isNotBlank(strTypeList) ){
            strTypeList = strTypeList.substring(strTypeList.indexOf("["), strTypeList.lastIndexOf("]")+1);
            JSONArray arrayList = JSONArray.fromObject(strTypeList);
            for (Object o : arrayList) {
                JSONObject object = JSONObject.fromObject(o);
                StringBuilder sb = new StringBuilder();
                sb.append("insert into n3b_vl_market_type values ");
                sb.append(" ( ");
                sb.append("'0"+object.get("code_value")+"'");
                sb.append(",'"+object.get("code_name")+"'");
                sb.append(",'"+object.get("code_no")+"'");
                sb.append(",'"+object.get("code_value")+"'");
                sb.append(",'"+object.get("valid_flag")+"'");
                sb.append(",'"+object.get("version")+"'");
                sb.append(",'"+object.get("code_type")+"'");
                sb.append(" ); ");
                System.out.println(sb.toString());
            }
        }
        return list;
    }
    
}

 

posted @ 2016-06-25 13:38  243573295  阅读(7264)  评论(0编辑  收藏  举报