hive_非常用
1.count(distinct xxx) 窗口函数
hive 2.x版本支持:
count(distinct cust_num) over(partition by xxx order by xxx) -- 分组内去重求和
hive1.x版本不支持:改版
size(collect_set(cust_num) over(partition by xxx order by xxx)) -- 分组内去重求和
2.
collect_set : set集合,没有重复元素
collect_list :list列表,可以有重复元素
select collect_list(value) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id;
[1,3,2,2]
select collect_set(value) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id; [1,3,2]
3.排序
sort_array
select sort_array(collect_set(value)) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id; [1,2,3]
4.集合元素连接:
select concat_ws('-','1','2','3') 1-2-3
select concat_ws('-',collect_set(cast(value as string))) from ( select 1 as id,1 as value from dual union all select 1 as id,3 as value from dual union all select 1 as id,2 as value from dual union all select 1 as id,2 as value from dual ) t group by id; 1-3-2