hive:数据库“行专列”操作---使用collect_set/collect_list/collect_all & row_number()over(partition by 分组字段 [order by 排序字段])

方案一:请参考《数据库“行专列”操作---使用row_number()over(partition by 分组字段 [order by 排序字段])》,该方案是sqlserver,oracle,mysql,hive均适用的。

在hive中的方案分为以下两种方案:

创建测试表,并插入测试数据:

--hive 测试 行转列 collect_set collect_list
create table tommyduan_test(
gridid string,
height int,
cell string,
mrcount int,
weakmrcount int
);

insert into tommyduan_test values('g1',1,'cell1',12,3);
insert into tommyduan_test values('g1',1,'cell2',22,3);
insert into tommyduan_test values('g1',1,'cell3',23,3);
insert into tommyduan_test values('g1',1,'cell4',1,3);
insert into tommyduan_test values('g1',1,'cell5',3,3);
insert into tommyduan_test values('g1',1,'cell6',4,3);
insert into tommyduan_test values('g1',1,'cell19',21,3);

insert into tommyduan_test values('g2',1,'cell4',1,3);
insert into tommyduan_test values('g2',1,'cell5',3,3);
insert into tommyduan_test values('g2',1,'cell6',4,3);
insert into tommyduan_test values('g2',1,'cell19',21,3);

方案二:使用collect_set方案

注意:collect_set是一个set集合,不允许重复的记录插入

select gridid,height,collect_list(cell) cellArray,collect_list(mrcount) mrcountArray,collect_list(weakmrcount) weakmrcountArray
from (
select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn
from tommyduan_test
group by gridid,height,cell,mrcount,weakmrcount
) t10
where rn<4
group by gridid,height;
+---------+---------+-----------------------------+---------------+-------------------+--+
| gridid  | height  |          cellarray          | mrcountarray  | weakmrcountarray  |
+---------+---------+-----------------------------+---------------+-------------------+--+
| g1      | 1       | ["cell3","cell2","cell19"]  | [23,22,21]    | [3,3,3]           |
| g2      | 1       | ["cell19","cell6","cell5"]  | [21,4,3]      | [3,3,3]           |
+---------+---------+-----------------------------+---------------+-------------------+--+

select gridid,height,
(case when size(cellArray)>0 then cellArray[0] else '-9999' end) as cell1,
(case when size(cellArray)>0 then mrcountArray[0] else '-9999' end) as cell1_mrcount,
(case when size(cellArray)>0 then weakmrcountArray[0] else '-9999' end) as cell1_weakmrcount,
(case when size(cellArray)>1 then cellArray[1] else '-9999' end) as cell2,
(case when size(cellArray)>1 then mrcountArray[1] else '-9999' end) as cell2_mrcount,
(case when size(cellArray)>1 then weakmrcountArray[1] else '-9999' end) as cell2_weakmrcount,
(case when size(cellArray)>2 then cellArray[2] else '-9999' end) as cell3,
(case when size(cellArray)>2 then mrcountArray[2] else '-9999' end) as cell3_mrcount,
(case when size(cellArray)>2 then weakmrcountArray[2] else '-9999' end) as cell3_weakmrcount
from 
(
select gridid,height,collect_list(cell) cellArray,collect_list(mrcount) mrcountArray,collect_list(weakmrcount) weakmrcountArray
from (
select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn
from tommyduan_test
group by gridid,height,cell,mrcount,weakmrcount
) t10
where rn<4
group by gridid,height
) t12;
+---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+
| gridid  | height  |  cell1  | cell1_mrcount  | cell1_weakmrcount  | cell2  | cell2_mrcount  | cell2_weakmrcount  |  cell3  | cell3_mrcount  | cell3_weakmrcount  |
+---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+
| g1      | 1       | cell3   | 23             | 3                  | cell2  | 22             | 3                  | cell19  | 21             | 3                  |
| g2      | 1       | cell19  | 21             | 3                  | cell6  | 4              | 3                  | cell5   | 3              | 3                  |
+---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+

 

方案三:使用collect_list/collect_all方案

注意:collect_set是一个set集合,不允许重复的记录插入

select gridid,height,collect_set(cell),collect_set(mrcount),collect_set(weakmrcount)
from (select * from tommyduan_test order by gridid,height,mrcount desc) t10
group by gridid,height;
+---------+---------+-------------------------------------------------------------+----------------------+------+--+
| gridid  | height  |                             _c2                             |         _c3          | _c4  |
+---------+---------+-------------------------------------------------------------+----------------------+------+--+
| g1      | 1       | ["cell3","cell2","cell19","cell1","cell6","cell5","cell4"]  | [23,22,21,12,4,3,1]  | [3]  |
| g2      | 1       | ["cell19","cell6","cell5","cell4"]                          | [21,4,3,1]           | [3]  |
+---------+---------+-------------------------------------------------------------+----------------------+------+--+

select gridid,height,collect_set(cell) cellArray,collect_set(mrcount) mrcountArray,collect_set(weakmrcount) weakmrcountArray
from (
    select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn
    from tommyduan_test
    group by gridid,height,cell,mrcount,weakmrcount
) t10
where rn<4
group by gridid,height;
+---------+---------+-----------------------------+---------------+-------------------+--+
| gridid  | height  |          cellarray          | mrcountarray  | weakmrcountarray  |
+---------+---------+-----------------------------+---------------+-------------------+--+
| g1      | 1       | ["cell3","cell2","cell19"]  | [23,22,21]    | [3]               |
| g2      | 1       | ["cell19","cell6","cell5"]  | [21,4,3]      | [3]               |
+---------+---------+-----------------------------+---------------+-------------------+--+

select gridid,height,collect_set(concat_ws(',',cell,cast(mrcount as string), cast(weakmrcount as string))) as cellArray
from (
select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn
from tommyduan_test
group by gridid,height,cell,mrcount,weakmrcount
) t10
where rn<4
group by gridid,height
+---------+---------+--------------------------------------------+--+
| gridid  | height  |                 cellarray                  |
+---------+---------+--------------------------------------------+--+
| g1      | 1       | ["cell3,23,3","cell2,22,3","cell19,21,3"]  |
| g2      | 1       | ["cell19,21,3","cell6,4,3","cell5,3,3"]    |
+---------+---------+--------------------------------------------+--+


select gridid,height,
(case when size(cellArray)>0 then split(cellArray[0],'_')[0] else '-9999' end) as cell1,
(case when size(cellArray)>0 then split(cellArray[0],'_')[1] else '-9999' end) as cell1_mrcount,
(case when size(cellArray)>0 then split(cellArray[0],'_')[2] else '-9999' end) as cell1_weakmrcount,
(case when size(cellArray)>1 then split(cellArray[1],'_')[0] else '-9999' end) as cell2,
(case when size(cellArray)>1 then split(cellArray[1],'_')[1] else '-9999' end) as cell2_mrcount,
(case when size(cellArray)>1 then split(cellArray[1],'_')[2] else '-9999' end) as cell2_weakmrcount,
(case when size(cellArray)>2 then split(cellArray[2],'_')[0] else '-9999' end) as cell3,
(case when size(cellArray)>2 then split(cellArray[2],'_')[1] else '-9999' end) as cell3_mrcount,
(case when size(cellArray)>2 then split(cellArray[2],'_')[2] else '-9999' end) as cell3_weakmrcount
from 
(
select gridid,height,collect_set(concat_ws('_',cell,cast(mrcount as string), cast(weakmrcount as string))) as cellArray
from (
select gridid,height,cell,mrcount,weakmrcount,row_number()over(partition by gridid,height order by mrcount desc) rn
from tommyduan_test
group by gridid,height,cell,mrcount,weakmrcount
) t10
where rn<4
group by gridid,height
) t12;
+---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+
| gridid  | height  |  cell1  | cell1_mrcount  | cell1_weakmrcount  | cell2  | cell2_mrcount  | cell2_weakmrcount  |  cell3  | cell3_mrcount  | cell3_weakmrcount  |
+---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+
| g1      | 1       | cell3   | 23             | 3                  | cell2  | 22             | 3                  | cell19  | 21             | 3                  |
| g2      | 1       | cell19  | 21             | 3                  | cell6  | 4              | 3                  | cell5   | 3              | 3                  |
+---------+---------+---------+----------------+--------------------+--------+----------------+--------------------+---------+----------------+--------------------+--+

 

posted @ 2018-01-13 20:55  cctext  阅读(8428)  评论(0编辑  收藏  举报