Hive 刷题——补全缺失日的月销售累计
问题描述
现有一张员工的销售记录表,表样式如下。现在需要统计每个员工在2024年2月份,截止到每天的月累计销售额。注意:存在有的员工在某几天是没有销售记录的。要补0
sale_date emp_id emp_name sale_amount '2024-02-02' ,'101' ,'老王' , 260 '2024-02-03' ,'101' ,'老王' , 200 '2024-02-05' ,'101' ,'老王' , 150 '2024-02-10' ,'101' ,'老王' , 130 '2024-02-13' ,'101' ,'老王' , 400 '2024-02-15' ,'101' ,'老王' , 420 '2024-02-27' ,'101' ,'老王' , 390
参考实现
with temp as (select '2024-02-02' as sale_date, '101' as emp_id, '老王' as emp_name, 260 as sale_amount union all select '2024-02-03' as sale_date, '101' as emp_id, '老王' as emp_name, 200 as sale_amount union all select '2024-02-05' as sale_date, '101' as emp_id, '老王' as emp_name, 150 as sale_amount union all select '2024-02-10' as sale_date, '101' as emp_id, '老王' as emp_name, 130 as sale_amount union all select '2024-02-13' as sale_date, '101' as emp_id, '老王' as emp_name, 400 as sale_amount union all select '2024-02-15' as sale_date, '101' as emp_id, '老王' as emp_name, 420 as sale_amount union all select '2024-02-27' as sale_date, '101' as emp_id, '老王' as emp_name, 390 as sale_amount) select dt , t1.emp_id , t1.emp_name , nvl(t2.sale_amount, 0) as sale_amount , sum(nvl(t2.sale_amount, 0)) over (partition by t1.emp_id order by t1.dt) as total_sale_amount from (select date_add(t.start_date, tab.pos) as dt , t.emp_id , t.emp_name from (select emp_id , emp_name , '2024-02-01' as start_date , '2024-02-28' as end_date from temp group by emp_id, emp_name) t lateral view posexplode(split(repeat(',', datediff(end_date, start_date)), ',')) tab as pos, val) t1 left join (select sale_date , emp_id , emp_name , sale_amount from temp) t2 on t1.dt = t2.sale_date and t1.emp_id = t2.emp_id and t1.emp_name = t2.emp_name
函数说明
SELECT repeat('hive', 3); --hivehivehive SELECT repeat(',', datediff('2024-02-10', '2024-02-04')); --,,,,,, SELECT split(repeat(',', datediff('2024-02-10', '2024-02-04')),','); --["","","","","","",""] select posexplode(split(repeat(',', datediff('2024-02-10', '2024-02-04')), ',')) as (pos, val); 0,"" 1,"" 2,"" 3,"" 4,"" 5,"" 6,""