Hive实现将分组取top2的数据进行相减(SQL面试题)
1.问题描述
一张成绩表class有如下字段,班级ID,英语成绩,数学成绩,语文成绩
id english math chinese classid
1 78 89 95 2
2 68 86 79 2
3 59 98 78 1
4 68 96 86 3
5 58 87 68 3
6 97 87 68 3
7 97 87 76 1
8 67 98 97 2
9 78 76 96 1
10 99 99 98 3
求:每个班级英语成绩最高的前两名的分值差为多少?
2.建表并加载数据
--建表
create table class
(id int,
english double,
math double,
chinese double,
classid int)
row format delimited
fields terminated by ',';
--加载数据
load data local inpath '/opt/modules/data/class' into table class;
3.问题分析
第一步,首先分组取top2的数据;
select
english,
classid
from
(select
english,
classid,
row_number() over (partition by classid order by english desc) rank
from class) temp1
where rank < 3;
--结果输出
english classid
97 1
78 1
78 2
68 2
99 3
97 3
第二步,将每个班级分组数据的最大值减去最小值;
select
classid,
max(english) - min(english) as eng_difference
from
(select
english,
classid
from
(select
english,
classid,
row_number() over (partition by classid order by english desc) rank
from class) temp1
where rank < 3) temp2
group by classid;
--结果
classid eng_difference
1 19
2 10
3 2
如果求所有的分值差,如下:
select
classid,
max(english) - min(english) as eng_difference,
max(math) - min(math) as math_difference,
max(chinese) - min(chinese) as china_difference
from
(select
english,
math,
chinese,
classid
from
(select
english,
math,
chinese,
classid,
row_number() over (partition by classid order by english desc) rank
from class) temp1
where rank < 3) temp2
group by classid;
--结果输出
classid eng_difference math_difference china_difference
1 19 11 20
2 10 3 16
3 2 12 30