数据清洗第一天
通过百度地图api接口,来通过完成单位获取经纬度进一步获取行政区划,此行政区划需要切割获取,再通过查询数据表,获取到行政区划对应的行政编码
关键代码:
public static String getXZ_BM(String xianji){ //获取行政编码 String code = ""; if(!xianji.equals("")) { code = Dao.getBianma(xianji); } return code; } public static String getXZ(String infos){//获取行政区划 /*String infos = "北京市建筑工程研究院有限责任公司 北京建工四建工程建设有限公司";*/ String[] danwei = null; String info = ""; if(infos.contains("")) { danwei = infos.split(" "); info = danwei[0];//第一单位获取地域 }else { info = infos; } System.out.println("info:::"+info); String code = getDiyu1(info); System.out.println("code:::"+code); String xingzheng = ""; String xianji = ""; if(code.substring(0,2).equals("北京")||code.substring(0,2).equals("天津")) { String[] dizhi = code.split("区"); String[] shixian = dizhi[0].split("市"); xianji = shixian[1]+"区";//获取县级 xingzheng = dizhi[0]+"区";//获取市区 }else if(code.substring(0,2).equals("河北")) {//获取省市县 if(code.contains("区")) { String[] dizhi = code.split("区"); String[] shixian = dizhi[0].split("市"); xianji = shixian[1]+"区"; xingzheng = dizhi[0]+"区"; }else if(code.contains("县")) { String[] dizhi = code.split("县"); String[] shixian = dizhi[0].split("市"); xianji = shixian[1]+"县"; xingzheng = dizhi[0]+"县"; }else { String[] xz = code.split("市"); xingzheng = xz[0] + "市" +xz[1] + "市"; } }else { code = "识别错误"; xingzheng = "识别错误"; } return xingzheng; } public static String getXianji(String xingzheng){//获取县级 String xianji = ""; System.out.println("开始获取县级:"+ xingzheng); if(xingzheng.substring(xingzheng.length()-1, xingzheng.length()).equals("市")) { String[] shi = xingzheng.split("市"); xianji = shi[1] + "市"; System.out.println("开始县级:"+ xianji); }else if(!xingzheng.equals("识别错误")) { String[] shi = xingzheng.split("市"); xianji = shi[1]; System.out.println("开始县级:"+ xianji); } return xianji; }
初步完成数据的 行政区划(省-市-县/市-区) 行政代码
下一步进行对关键字提取