hive_面试题 【统计类问题】

需求1 : 计算除去部门最高工资, 和最低工资的平均工资 (字节跳动面试)
1. 数据准备
复制代码
-- DDL
create table btab (
`id` string comment '员工id',
`deptno` string comment '部门编号',
`salary` int comment '部门编号')
 comment '员工薪资信息表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DML
insert overwrite table btab
select 'a' as id,'001' as deptno,1 as salary union all
select 'b' as id,'001' as deptno,2 as salary union all
select 'c' as id,'001' as deptno,3 as salary union all
select 'c1' as id,'001' as deptno,4 as salary union all
select 'c2' as id,'001' as deptno,5 as salary union all
select 'c3' as id,'001' as deptno,6 as salary union all
select 'd' as id,'002' as deptno,4 as salary union all
select 'e' as id,'002' as deptno,5 as salary union all
select 'f' as id,'002' as deptno,6 as salary union all
select 'g' as id,'003' as deptno,7 as salary union all
select 'h' as id,'004' as deptno,8 as salary union all
select 'i' as id,'004' as deptno,9 as salary
;

-- 数据说明
-- B表记录了 员工id,部门编号,工资
复制代码
2. 执行Sql
复制代码
-- 思路1(推荐)
-- 通过 窗口函数剔除 部门最大值、最小值
select
    deptno
    ,avg(salary)
from (
     select id
          , deptno
          , salary
          , rank() over (partition by deptno order by salary asc)  as min_rank  -- 升序获取最小值
          , rank() over (partition by deptno order by salary desc) as max_rank -- 降序获取最大值
     from btab
) as t1
where min_rank > 1 and max_rank > 1
group by deptno;

deptno  _c1
001     3.5
002     5.0
Time taken: 37.849 seconds, Fetched: 2 row(s)


-- 思路2(推荐)
-- 通过 窗口函数剔除 部门最大值、最小值
select
    deptno
    ,avg(salary)
from (
    select
    id
    ,deptno
    ,salary
    ,max(salary) over(partition by deptno
         rows between unbounded preceding and unbounded following -- 当前行到终点
        ) as max_salary -- 按部门分组,获取 组内salary的最大值
    ,min(salary) over(partition by deptno
        rows between unbounded preceding and unbounded following -- 当前行到终点
        ) as min_salary -- 按部门分组,获取 组内salary的最小值
    ,if(
        salary = max(salary) over(partition by deptno rows between unbounded preceding and unbounded following)
        or
        salary = min(salary) over(partition by deptno rows between unbounded preceding and unbounded following)
        ,0,1
    ) as flag

    from btab
) as t1
where flag = 1
group by deptno
;

deptno  _c1
001     3.5
002     5.0
Time taken: 23.702 seconds, Fetched: 2 row(s)


-- 思路3(不推荐)
-- 通过 子查询剔除 部门最大值、最小值
select
t1.deptno
,avg(t1.salary)

from btab as t1
left outer join (
     select
     deptno
    ,min(salary) as min_salary
    ,max(salary) as max_salary
    from btab
    group by deptno
) as t2
on t1.deptno = t2.deptno
where if(
    t1.salary = t2.min_salary
    or t1.salary = t2.max_salary
    ,1,0
    ) = 0
group by t1.deptno
;

t1.deptno       _c1
001     3.5
002     5.0
Time taken: 49.193 seconds, Fetched: 2 row(s)
复制代码

 

posted @   学而不思则罔!  阅读(76)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· 字符编码:从基础到乱码解决
· SpringCloud带你走进微服务的世界
点击右上角即可分享
微信分享提示