Hive 刷题——连续签到领金币问题

需求描述

有如下数据

create table temp_user_log
(
    uid        int comment '用户id',
    artical_id int comment '视频id',
    in_time    string comment '进入时间',
    out_time   string comment '离开时间',
    sign_in    int comment '是否签到'
) stored as orc tblproperties ("orc.compress" = "snappy");

INSERT INTO temp_user_log(uid, artical_id, in_time, out_time, sign_in)
VALUES (101, 0, '2021-07-07 10:00:00', '2021-07-07 10:00:09', 1),
       (101, 0, '2021-07-08 10:00:00', '2021-07-08 10:00:09', 1),
       (101, 0, '2021-07-09 10:00:00', '2021-07-09 10:00:42', 1),
       (101, 0, '2021-07-10 10:00:00', '2021-07-10 10:00:09', 1),
       (101, 0, '2021-07-11 23:59:55', '2021-07-11 23:59:59', 1),
       (101, 0, '2021-07-12 10:00:28', '2021-07-12 10:00:50', 1),
       (101, 0, '2021-07-13 10:00:28', '2021-07-13 10:00:50', 1),
       (101, 0, '2021-07-14 11:00:28', '2021-07-14 11:00:50', 1),
       (101, 0, '2021-07-15 11:59:28', '2021-07-16 00:01:20', 1),
       (102, 0, '2021-10-01 10:00:28', '2021-10-01 10:00:50', 1),
       (102, 0, '2021-10-02 10:00:01', '2021-10-02 10:01:50', 1),
       (102, 0, '2021-10-03 11:00:55', '2021-10-03 11:00:59', 1),
       (102, 0, '2021-10-04 11:00:45', '2021-10-04 11:00:55', 0),
       (102, 0, '2021-10-05 11:00:53', '2021-10-05 11:00:59', 1),
       (102, 0, '2021-10-06 11:00:45', '2021-10-06 11:00:55', 1);

场景逻辑说明:

artical_id-文章ID代表用户浏览的文章的ID,特殊情况artical_id-文章ID为0表示用户在非文章内容页(比如App内的列表页、活动页等)。注意:只有artical_id为0时sign_in值才有效。从2021年7月7日0点开始,用户每天签到可以领1金币,并可以开始累积签到天数,连续签到的第3、7天分别可额外领2、6金币。每连续签到7天后重新累积签到天数(即重置签到天数:连续第8天签到时记为新的一轮签到的第一天,领1金币)问题:计算每个用户2021年7月以来每月获得的金币数(该活动到10月底结束,11月1日开始的签到不再获得金币)。结果按月份、ID升序排序。注:如果签到记录的in_time-进入时间和out_time-离开时间跨天了,也只记作in_time对应的日期签到了。

分析

领取金币规则:

(1)只要用户签到就能获取一枚金币
(2)连续签到第三天和第七天分别额外获取2枚和6金币
(3)连续签到7天后进行重置,按照规则(1)和(2)进行新一轮领币
分析:本题的维度是用户和月份,也就是最终结果是按照这两个来分组

  • 第一步:根据领金币的规则,我们容易想到按照签到的连续性先进行分组,将连续的进行标记分成一组
  • 第二步:在每一个连续的组里进行row_number()标记(rn),目的是获取连续的第三天和第七个天特殊位置。
  • 第三步:根据第二步连续的天数中是否有超过7天的,然后以此进行分割,再进行分组 

参考实现

select uid,
       month,
       series_flag,
       series_7_flg,
       case
           when cnt >= 1 and cnt < 3 then cnt
           when cnt >= 3 and cnt < 7 then cnt + 2
           else cnt + 2 + 6 end coin
from (select uid, month, series_flag, series_7_flg, count(*) cnt
      from (select uid,
                   in_time,
                   sign_in,
                   month,
                   series_flag,
                   rn,
                   sum(if(rn % 7 = 0, 1, 0))
                       over (partition by uid,month,series_flag order by in_time rows between current row and unbounded following) series_7_flg
            from (select uid,
                         in_time,
                         sign_in,
                         month,
                         series_flag,
                         row_number() over (partition by uid, series_flag,month) rn
                  from (select uid,
                               in_time,
                               sign_in,
                               month,
                               sum(if(diff < 1, 1, 0))
                                   over (partition by uid,month,series_sign_in_flag order by in_time) series_flag
                        from (select uid,
                                     in_time,
                                     sign_in,
                                     month,
                                     datediff(to_date(in_time), to_date(lag_in_time))   diff,
                                     sum(if(sign_in != lag_sign_in, 1, 0))
                                         over (partition by uid,month order by in_time) series_sign_in_flag
                              from (select uid,
                                           in_time,
                                           sign_in,
                                           substr(in_time, 1, 7)                                              month,
                                           lag(in_time, 1, in_time)
                                               over (partition by uid,substr(in_time, 1, 7) order by in_time) lag_in_time,
                                           lag(sign_in, 1, sign_in)
                                               over (partition by uid,substr(in_time, 1, 7) order by in_time) lag_sign_in
                                    from temp_user_log) t) t) i) i) i
      group by uid, month, series_flag, series_7_flg) i;

select uid,
       month,
       sum(case
               when cnt >= 1 and cnt < 3 then cnt
               when cnt >= 3 and cnt < 7 then cnt + 2
               else cnt + 2 + 6 end) coin
from (select uid, month, series_flag, series_7_flg, count(*) cnt
      from (select uid,
                   in_time,
                   sign_in,
                   month,
                   series_flag,
                   rn,
                   sum(if(rn % 7 = 0, 1, 0))
                       over (partition by uid,month,series_flag order by in_time rows between current row and unbounded following) series_7_flg
            from (select uid,
                         in_time,
                         sign_in,
                         month,
                         series_flag,
                         row_number() over (partition by uid, series_flag,month) rn
                  from (select uid,
                               in_time,
                               sign_in,
                               month,
                               sum(if(diff < 1, 1, 0))
                                   over (partition by uid,month,series_sign_in_flag order by in_time) series_flag
                        from (select uid,
                                     in_time,
                                     sign_in,
                                     month,
                                     datediff(to_date(in_time), to_date(lag_in_time))   diff,
                                     sum(if(sign_in != lag_sign_in, 1, 0))
                                         over (partition by uid,month order by in_time) series_sign_in_flag
                              from (select uid,
                                           in_time,
                                           sign_in,
                                           substr(in_time, 1, 7)                                              month,
                                           lag(in_time, 1, in_time)
                                               over (partition by uid,substr(in_time, 1, 7) order by in_time) lag_in_time,
                                           lag(sign_in, 1, sign_in)
                                               over (partition by uid,substr(in_time, 1, 7) order by in_time) lag_sign_in
                                    from temp_user_log
                                    where artical_id = 0
                                      and substr(in_time, 1, 7) >= '2021-07'
                                      and substr(in_time, 1, 7) <= '2021-10') t) t
                        where sign_in = 1) i) i) i
      group by uid, month, series_flag, series_7_flg) i
group by uid, month;

 

posted @ 2023-02-05 08:54  晓枫的春天  阅读(96)  评论(0编辑  收藏  举报