阿里DataWorks注册UDTF函数
1.背景
最近有个需求需要解析mongodb里面的json数据,采用的开发平台是dataworks,原始json内容如下:
{ "id": 0,// 方案ID "premiseDetails": [ { "premiseId": 0,// 楼盘ID "price": 0, // 价格 "pointDetails": [ { "code": "",// 点位编码 "network": 0, // 联网状态: "unitId": 0, // 单元ID "unitState": true, //单元状态,梯内没有该字段 "success": false, // 是否选中 "time": "", // 变更时间 "info": "" // 踢点原因 } ] } ] }
2.实施
(1)数据同步到mc,我用一个id和premise_details来接收mongodb的数据。
@Resolve("string->string,string,string,string,string,string,string,string") public class get_mongodb_json_udtf extends UDTF { @Override public void process(Object[] objects) throws UDFException { String input = (String) objects[0]; input = input.replaceAll("=", "\":\"") .replaceAll("Document", "") .replaceAll("\\{\\{", "{\"") .replaceAll("\\}\\}", "\"}") .replaceAll("]\"},", "]}#") .replaceAll("\\},\\{", "}#{") .replaceAll("\\},", "}@") .replaceAll(",", "\",\"") .replaceAll("#", ",") .replaceAll("@", ",") .replaceAll("pointDetails\":\"", "pointDetails\":") .replaceAll("}]\"}]", "}]}]") .replaceAll(" ", "") ; JsonParser parser = new JsonParser(); // 解析JSON数组字符串 JsonArray jsonArray = parser.parse(input).getAsJsonArray(); if(jsonArray!=null) { // 遍历JsonArray for (JsonElement element : jsonArray) { JsonObject obj = element.getAsJsonObject(); String premiseId = obj.get("premiseId").getAsString(); String price = ""; if (obj.has("price") && !obj.get("price").isJsonNull()) { price = obj.get("price").getAsString(); } JsonArray pointDetails = obj.get("pointDetails").getAsJsonArray(); for (JsonElement point : pointDetails) { JsonObject pointObj = point.getAsJsonObject(); String pointNum = ""; String unitId = ""; String network = ""; String success = ""; String time = ""; String info = ""; if (pointObj.has("code") && !pointObj.get("code").isJsonNull()) { pointNum = pointObj.get("code").getAsString(); } if (pointObj.has("unitId") && !pointObj.get("unitId").isJsonNull()) { unitId = pointObj.get("unitId").getAsString(); } if (pointObj.has("network") && !pointObj.get("network").isJsonNull()) { network = pointObj.get("network").getAsString(); } if (pointObj.has("success") && !pointObj.get("success").isJsonNull()) { success = pointObj.get("success").getAsString(); } if (pointObj.has("time") && !pointObj.get("time").isJsonNull()) { time = pointObj.get("time").getAsString(); } if (pointObj.has("info") && !pointObj.get("info").isJsonNull()) { info = pointObj.get("info").getAsString(); } forward(premiseId, price, pointNum, unitId, network, success, time, info); } } } } }
更多技术知识关注公众号《码农独白》