数据写入到rdd,之后转换为sparksql
SparkSession spark = SparkSession.builder().master("yarn").appName("json2hive")
.config("hive.exec.dynamic.partition", "true")
.config("hive.exec.dynamic.partition.mode", "nonstrict")
.getOrCreate();
SparkContext sc = spark.sparkContext();
JavaSparkContext jsc = new JavaSparkContext(sc);
// jsc.parallelize()
String jsonStr = "{\"data\":[{\"name\":\"bym\",\"age\":18},{\"name\":\"tom\",\"age\":20}]}";
JSONObject message = JSONUtil.parseObj(jsonStr);
JSONArray data = message.getJSONArray("data");
// String s = HttpUtil.get("");
// JSONObject jsonObject = JSONUtil.parseObj(s);
// JSONArray data = jsonObject.getJSONArray("DATA");
List<String> list = new ArrayList<>();
for (int i = 0; i < data.size(); i++) {
list.add(data.get(i).toString());
}
JavaRDD<String> tdd = jsc.parallelize(list);
Dataset<Row> ds =
spark.read().json(tdd);
ds.show(20);
ds.write().mode(SaveMode.Overwrite).insertInto("ic_gcy_mart.ods_ebgwms_delivery_note_h_25001_tmp");
spark.stop();