给书呆子们多普及一下知识点,hive解析json不是必须udf
多看看官网手册:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-CreatingCustomUDFs
xxx_data_json | [{"xxx_area_type":"住宅区","xxx_source":"sz","xxx_area_code":"aaa","xxx_code":"bbb","xxx_id":"xx","xxx_zc":"ccc","xxx_name":"faasdfd"}] |
xxxtype_data_json |
[{"xxx_type_name":"普通小区","xxx_source":"sz","xxx_code":"aaa","xxx_par_type":"A","xxx_par_type_name":"住宅小区","xxx_id":"fsdafad","xxx_type":"3fdas4","xxx_zc":"gdsfew","xxx_name":"新里西斯莱公馆"} ,{"xxx_type_name":"学校","xxx_source":"sz","xxx_code":"aaa","xxx_par_type":"A","xxx_par_type_name":"学校","xxx_id":"fsdafad","xxx_type":"3fdas4","xxx_zc":"gdsfew","xxx_name":"hh小学"}] |
取出json数组的所有xxx_type_name的值,返回一个数组
select get_json_object(xxxtype_data_json, '$.\[0:-1].xxx_type_name') as xxx_type_name ,get_json_object(xxx_data_json, '$.\[0:-1].xxx_area_code') as xxx_area_code from dim.dim_yyy_mid where inc_day='20220506' limit 20
只取数组第一个
select get_json_object(xxxtype_data_json, '$.\[0].xxx_type_name') as xxx_type_name ,get_json_object(xxx_data_json, '$.\[0].xxx_area_code') as xxx_area_code from dim.dim_yyy_mid where inc_day='20220506' limit 20
select get_json_object(subcontainers,'$.containerNo') ,get_json_object(subcontainers,'$\[0].containerNo') --不带点写法 ,get_json_object(subcontainers,'$.\[0].containerNo') --带点写法 ,get_json_object(subcontainers,'$.\[1].containerNo') from xxx where inc_day='20210501' limit 22
方便验证:
引用于https://blog.csdn.net/weixin_44931681/article/details/127345564
-- 创建临时表 with t as ( select '[{"carrer":"大数据开发工程师","dream":["开个便利店","去外面逛一逛","看本好书"]},{"carrer":"退休农民","dream":["儿子听话","带孙子"]}]' as dfs ) select get_json_object(dfs,'$.\[0].carrer') as carrer0 , get_json_object(dfs,'$\[1].carrer') as carrer1 , get_json_object(dfs,'$\[0].dream[0]') as dream0 , get_json_object(dfs,'$\[1].dream[0]') as dream1 from t limit 222