hive_面试题 【统计类问题】
需求1 : 计算除去部门最高工资, 和最低工资的平均工资 (字节跳动面试)
1. 数据准备
-- DDL create table btab ( `id` string comment '员工id', `deptno` string comment '部门编号', `salary` int comment '部门编号') comment '员工薪资信息表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table btab select 'a' as id,'001' as deptno,1 as salary union all select 'b' as id,'001' as deptno,2 as salary union all select 'c' as id,'001' as deptno,3 as salary union all select 'c1' as id,'001' as deptno,4 as salary union all select 'c2' as id,'001' as deptno,5 as salary union all select 'c3' as id,'001' as deptno,6 as salary union all select 'd' as id,'002' as deptno,4 as salary union all select 'e' as id,'002' as deptno,5 as salary union all select 'f' as id,'002' as deptno,6 as salary union all select 'g' as id,'003' as deptno,7 as salary union all select 'h' as id,'004' as deptno,8 as salary union all select 'i' as id,'004' as deptno,9 as salary ; -- 数据说明 -- B表记录了 员工id,部门编号,工资
2. 执行Sql
-- 思路1(推荐) -- 通过 窗口函数剔除 部门最大值、最小值 select deptno ,avg(salary) from ( select id , deptno , salary , rank() over (partition by deptno order by salary asc) as min_rank -- 升序获取最小值 , rank() over (partition by deptno order by salary desc) as max_rank -- 降序获取最大值 from btab ) as t1 where min_rank > 1 and max_rank > 1 group by deptno; deptno _c1 001 3.5 002 5.0 Time taken: 37.849 seconds, Fetched: 2 row(s) -- 思路2(推荐) -- 通过 窗口函数剔除 部门最大值、最小值 select deptno ,avg(salary) from ( select id ,deptno ,salary ,max(salary) over(partition by deptno rows between unbounded preceding and unbounded following -- 当前行到终点 ) as max_salary -- 按部门分组,获取 组内salary的最大值 ,min(salary) over(partition by deptno rows between unbounded preceding and unbounded following -- 当前行到终点 ) as min_salary -- 按部门分组,获取 组内salary的最小值 ,if( salary = max(salary) over(partition by deptno rows between unbounded preceding and unbounded following) or salary = min(salary) over(partition by deptno rows between unbounded preceding and unbounded following) ,0,1 ) as flag from btab ) as t1 where flag = 1 group by deptno ; deptno _c1 001 3.5 002 5.0 Time taken: 23.702 seconds, Fetched: 2 row(s) -- 思路3(不推荐) -- 通过 子查询剔除 部门最大值、最小值 select t1.deptno ,avg(t1.salary) from btab as t1 left outer join ( select deptno ,min(salary) as min_salary ,max(salary) as max_salary from btab group by deptno ) as t2 on t1.deptno = t2.deptno where if( t1.salary = t2.min_salary or t1.salary = t2.max_salary ,1,0 ) = 0 group by t1.deptno ; t1.deptno _c1 001 3.5 002 5.0 Time taken: 49.193 seconds, Fetched: 2 row(s)
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· 字符编码:从基础到乱码解决
· SpringCloud带你走进微服务的世界