hive:数据库“行专列”操作---使用collect_set/collect_list/collect_all & row_number()over(partition by 分组字段 [order by 排序字段])
方案一:请参考《数据库“行专列”操作---使用row_number()over(partition by 分组字段 [order by 排序字段])》,该方案是sqlserver,oracle,mysql,hive均适用的。
在hive中的方案分为以下两种方案:
创建测试表,并插入测试数据:
--hive 测试 行转列 collect_set collect_list create table tommyduan_test( gridid string, height int, cell string, mrcount int, weakmrcount int ); insert into tommyduan_test values('g1',1,'cell1',12,3); insert into tommyduan_test values('g1',1,'cell2',22,3); insert into tommyduan_test values('g1',1,'cell3',23,3); insert into tommyduan_test values('g1',1,'cell4',1,3); insert into tommyduan_test values('g1',1,'cell5',3,3); insert into tommyduan_test values('g1',1,'cell6',4,3); insert into tommyduan_test values('g1',1,'cell19',21,3); insert into tommyduan_test values('g2',1,'cell4',1,3); insert into tommyduan_test values('g2',1,'cell5',3,3); insert into tommyduan_test values('g2',1,'cell6',4,3); insert into tommyduan_test values('g2',1,'cell19',21,3);
方案二:使用collect_set方案
注意:collect_set是一个set集合,不允许重复的记录插入
select gridid,height,collect_list(cell) cellArray,collect_list(mrcount) mrcountArray,collect_list(weakmrcount) weakmrcountArray from ( select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn from tommyduan_test group by gridid,height,cell,mrcount,weakmrcount ) t10 where rn<4 group by gridid,height; +---------+---------+-----------------------------+---------------+-------------------+--+ | gridid | height | cellarray | mrcountarray | weakmrcountarray | +---------+---------+-----------------------------+---------------+-------------------+--+ | g1 | 1 | ["cell3","cell2","cell19"] | [23,22,21] | [3,3,3] | | g2 | 1 | ["cell19","cell6","cell5"] | [21,4,3] | [3,3,3] | +---------+---------+-----------------------------+---------------+-------------------+--+ select gridid,height, (case when size(cellArray)>0 then cellArray[0] else '-9999' end) as cell1, (case when size(cellArray)>0 then mrcountArray[0] else '-9999' end) as cell1_mrcount, (case when size(cellArray)>0 then weakmrcountArray[0] else '-9999' end) as cell1_weakmrcount, (case when size(cellArray)>1 then cellArray[1] else '-9999' end) as cell2, (case when size(cellArray)>1 then mrcountArray[1] else '-9999' end) as cell2_mrcount, (case when size(cellArray)>1 then weakmrcountArray[1] else '-9999' end) as cell2_weakmrcount, (case when size(cellArray)>2 then cellArray[2] else '-9999' end) as cell3, (case when size(cellArray)>2 then mrcountArray[2] else '-9999' end) as cell3_mrcount, (case when size(cellArray)>2 then weakmrcountArray[2] else '-9999' end) as cell3_weakmrcount from ( select gridid,height,collect_list(cell) cellArray,collect_list(mrcount) mrcountArray,collect_list(weakmrcount) weakmrcountArray from ( select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn from tommyduan_test group by gridid,height,cell,mrcount,weakmrcount ) t10 where rn<4 group by gridid,height ) t12; +---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+ | gridid | height | cell1 | cell1_mrcount | cell1_weakmrcount | cell2 | cell2_mrcount | cell2_weakmrcount | cell3 | cell3_mrcount | cell3_weakmrcount | +---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+ | g1 | 1 | cell3 | 23 | 3 | cell2 | 22 | 3 | cell19 | 21 | 3 | | g2 | 1 | cell19 | 21 | 3 | cell6 | 4 | 3 | cell5 | 3 | 3 | +---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+
方案三:使用collect_list/collect_all方案
注意:collect_set是一个set集合,不允许重复的记录插入
select gridid,height,collect_set(cell),collect_set(mrcount),collect_set(weakmrcount) from (select * from tommyduan_test order by gridid,height,mrcount desc) t10 group by gridid,height; +---------+---------+-------------------------------------------------------------+----------------------+------+--+ | gridid | height | _c2 | _c3 | _c4 | +---------+---------+-------------------------------------------------------------+----------------------+------+--+ | g1 | 1 | ["cell3","cell2","cell19","cell1","cell6","cell5","cell4"] | [23,22,21,12,4,3,1] | [3] | | g2 | 1 | ["cell19","cell6","cell5","cell4"] | [21,4,3,1] | [3] | +---------+---------+-------------------------------------------------------------+----------------------+------+--+ select gridid,height,collect_set(cell) cellArray,collect_set(mrcount) mrcountArray,collect_set(weakmrcount) weakmrcountArray from ( select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn from tommyduan_test group by gridid,height,cell,mrcount,weakmrcount ) t10 where rn<4 group by gridid,height; +---------+---------+-----------------------------+---------------+-------------------+--+ | gridid | height | cellarray | mrcountarray | weakmrcountarray | +---------+---------+-----------------------------+---------------+-------------------+--+ | g1 | 1 | ["cell3","cell2","cell19"] | [23,22,21] | [3] | | g2 | 1 | ["cell19","cell6","cell5"] | [21,4,3] | [3] | +---------+---------+-----------------------------+---------------+-------------------+--+ select gridid,height,collect_set(concat_ws(',',cell,cast(mrcount as string), cast(weakmrcount as string))) as cellArray from ( select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn from tommyduan_test group by gridid,height,cell,mrcount,weakmrcount ) t10 where rn<4 group by gridid,height +---------+---------+--------------------------------------------+--+ | gridid | height | cellarray | +---------+---------+--------------------------------------------+--+ | g1 | 1 | ["cell3,23,3","cell2,22,3","cell19,21,3"] | | g2 | 1 | ["cell19,21,3","cell6,4,3","cell5,3,3"] | +---------+---------+--------------------------------------------+--+ select gridid,height, (case when size(cellArray)>0 then split(cellArray[0],'_')[0] else '-9999' end) as cell1, (case when size(cellArray)>0 then split(cellArray[0],'_')[1] else '-9999' end) as cell1_mrcount, (case when size(cellArray)>0 then split(cellArray[0],'_')[2] else '-9999' end) as cell1_weakmrcount, (case when size(cellArray)>1 then split(cellArray[1],'_')[0] else '-9999' end) as cell2, (case when size(cellArray)>1 then split(cellArray[1],'_')[1] else '-9999' end) as cell2_mrcount, (case when size(cellArray)>1 then split(cellArray[1],'_')[2] else '-9999' end) as cell2_weakmrcount, (case when size(cellArray)>2 then split(cellArray[2],'_')[0] else '-9999' end) as cell3, (case when size(cellArray)>2 then split(cellArray[2],'_')[1] else '-9999' end) as cell3_mrcount, (case when size(cellArray)>2 then split(cellArray[2],'_')[2] else '-9999' end) as cell3_weakmrcount from ( select gridid,height,collect_set(concat_ws('_',cell,cast(mrcount as string), cast(weakmrcount as string))) as cellArray from ( select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn from tommyduan_test group by gridid,height,cell,mrcount,weakmrcount ) t10 where rn<4 group by gridid,height ) t12; +---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+ | gridid | height | cell1 | cell1_mrcount | cell1_weakmrcount | cell2 | cell2_mrcount | cell2_weakmrcount | cell3 | cell3_mrcount | cell3_weakmrcount | +---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+ | g1 | 1 | cell3 | 23 | 3 | cell2 | 22 | 3 | cell19 | 21 | 3 | | g2 | 1 | cell19 | 21 | 3 | cell6 | 4 | 3 | cell5 | 3 | 3 | +---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+
基础才是编程人员应该深入研究的问题,比如:
1)List/Set/Map内部组成原理|区别
2)mysql索引存储结构&如何调优/b-tree特点、计算复杂度及影响复杂度的因素。。。
3)JVM运行组成与原理及调优
4)Java类加载器运行原理
5)Java中GC过程原理|使用的回收算法原理
6)Redis中hash一致性实现及与hash其他区别
7)Java多线程、线程池开发、管理Lock与Synchroined区别
8)Spring IOC/AOP 原理;加载过程的。。。
【+加关注】。