Hive 刷题——补全缺失日的月销售累计

问题描述

现有一张员工的销售记录表,表样式如下。现在需要统计每个员工在2024年2月份,截止到每天的月累计销售额。注意:存在有的员工在某几天是没有销售记录的。要补0

 sale_date   emp_id  emp_name sale_amount
 '2024-02-02' ,'101' ,'老王' , 260 
 '2024-02-03' ,'101' ,'老王' , 200 
 '2024-02-05' ,'101' ,'老王' , 150 
 '2024-02-10' ,'101' ,'老王' , 130 
 '2024-02-13' ,'101' ,'老王' , 400 
 '2024-02-15' ,'101' ,'老王' , 420 
 '2024-02-27' ,'101' ,'老王' , 390  

参考实现

with temp as (select '2024-02-02' as sale_date, '101' as emp_id, '老王' as emp_name, 260 as sale_amount
              union all
              select '2024-02-03' as sale_date, '101' as emp_id, '老王' as emp_name, 200 as sale_amount
              union all
              select '2024-02-05' as sale_date, '101' as emp_id, '老王' as emp_name, 150 as sale_amount
              union all
              select '2024-02-10' as sale_date, '101' as emp_id, '老王' as emp_name, 130 as sale_amount
              union all
              select '2024-02-13' as sale_date, '101' as emp_id, '老王' as emp_name, 400 as sale_amount
              union all
              select '2024-02-15' as sale_date, '101' as emp_id, '老王' as emp_name, 420 as sale_amount
              union all
              select '2024-02-27' as sale_date, '101' as emp_id, '老王' as emp_name, 390 as sale_amount)

select dt
     , t1.emp_id
     , t1.emp_name
     , nvl(t2.sale_amount, 0)                                                   as sale_amount
     , sum(nvl(t2.sale_amount, 0)) over (partition by t1.emp_id order by t1.dt) as total_sale_amount
from (select date_add(t.start_date, tab.pos) as dt
           , t.emp_id
           , t.emp_name
      from (select emp_id
                 , emp_name
                 , '2024-02-01' as start_date
                 , '2024-02-28' as end_date
            from temp
            group by emp_id, emp_name) t
               lateral view posexplode(split(repeat(',', datediff(end_date, start_date)), ',')) tab as pos, val) t1
         left join
     (select sale_date
           , emp_id
           , emp_name
           , sale_amount
      from temp) t2
     on t1.dt = t2.sale_date and t1.emp_id = t2.emp_id and t1.emp_name = t2.emp_name

函数说明

SELECT repeat('hive', 3);
--hivehivehive

SELECT repeat(',', datediff('2024-02-10', '2024-02-04'));
--,,,,,,
SELECT split(repeat(',', datediff('2024-02-10', '2024-02-04')),',');
--["","","","","","",""]


select posexplode(split(repeat(',', datediff('2024-02-10', '2024-02-04')), ',')) as (pos, val);
0,""
1,""
2,""
3,""
4,""
5,""
6,""

 

posted @ 2024-03-25 08:42  晓枫的春天  阅读(96)  评论(0编辑  收藏  举报