给书呆子们多普及一下知识点,hive解析json不是必须udf

 多看看官网手册:https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-CreatingCustomUDFs

xxx_data_json [{"xxx_area_type":"住宅区","xxx_source":"sz","xxx_area_code":"aaa","xxx_code":"bbb","xxx_id":"xx","xxx_zc":"ccc","xxx_name":"faasdfd"}]
xxxtype_data_json

[{"xxx_type_name":"普通小区","xxx_source":"sz","xxx_code":"aaa","xxx_par_type":"A","xxx_par_type_name":"住宅小区","xxx_id":"fsdafad","xxx_type":"3fdas4","xxx_zc":"gdsfew","xxx_name":"新里西斯莱公馆"}

,{"xxx_type_name":"学校","xxx_source":"sz","xxx_code":"aaa","xxx_par_type":"A","xxx_par_type_name":"学校","xxx_id":"fsdafad","xxx_type":"3fdas4","xxx_zc":"gdsfew","xxx_name":"hh小学"}]

 

 

取出json数组的所有xxx_type_name的值,返回一个数组

select 
get_json_object(xxxtype_data_json, '$.\[0:-1].xxx_type_name') as xxx_type_name
,get_json_object(xxx_data_json, '$.\[0:-1].xxx_area_code') as xxx_area_code
from
 dim.dim_yyy_mid where inc_day='20220506' 
limit 20

 

 

 

只取数组第一个

select 
get_json_object(xxxtype_data_json, '$.\[0].xxx_type_name') as xxx_type_name
,get_json_object(xxx_data_json, '$.\[0].xxx_area_code') as xxx_area_code
from
 dim.dim_yyy_mid where inc_day='20220506' 
limit 20

 

 select 
 get_json_object(subcontainers,'$.containerNo') 
 ,get_json_object(subcontainers,'$\[0].containerNo')    --不带点写法
 ,get_json_object(subcontainers,'$.\[0].containerNo')   --带点写法
 ,get_json_object(subcontainers,'$.\[1].containerNo') 
 from xxx where inc_day='20210501' limit 22

 

方便验证:

引用于https://blog.csdn.net/weixin_44931681/article/details/127345564

-- 创建临时表
with t as (
        select  '[{"carrer":"大数据开发工程师","dream":["开个便利店","去外面逛一逛","看本好书"]},{"carrer":"退休农民","dream":["儿子听话","带孙子"]}]' as dfs

       ) 
select get_json_object(dfs,'$.\[0].carrer') as carrer0
      , get_json_object(dfs,'$\[1].carrer') as carrer1
      , get_json_object(dfs,'$\[0].dream[0]') as dream0
      , get_json_object(dfs,'$\[1].dream[0]') as dream1
  from t  limit 222

 

posted on 2022-05-07 12:59  架构艺术  阅读(201)  评论(0编辑  收藏  举报