hive 求各直播间同时在线最大人数

需求描述

需求有以下数据

create table temp_online_data
(
    room_id    varchar(2) comment '直播间ID',
    anchor_id  varchar(5) comment '播主ID',
    start_time varchar(20) comment '上播时间',
    end_time   varchar(20) comment '下播时间'
) row format delimited fields terminated by ','
    stored as orc
    tblproperties ("orc.compress" = "snappy");
insert into temp_online_data(room_id, anchor_id, start_time, end_time)
values ('0', '1004', '2021-06-14 13:15:12', '2021-06-14 20:12:12'),
       ('0', '1002', '2021-06-14 15:12:12', '2021-06-14 16:12:12'),
       ('0', '1005', '2021-06-14 15:18:12', '2021-06-14 20:12:12'),
       ('0', '1001', '2021-06-14 20:12:12', '2021-06-14 23:12:12'),
       ('0', '1006', '2021-06-14 21:12:12', '2021-06-14 23:15:12'),
       ('0', '1007', '2021-06-14 22:12:12', '2021-06-14 23:10:12'),
       ('1', '1001', '2021-06-14 12:12:12', '2021-06-14 18:12:12'),
       ('1', '1003', '2021-06-14 13:12:12', '2021-06-14 16:12:12'),
       ('1', '1004', '2021-06-14 13:15:12', '2021-06-14 20:12:12'),
       ('1', '1002', '2021-06-14 15:12:12', '2021-06-14 16:12:12');

现在要求出每个直播间同时在线的播主人数;

需求实现

1、将数据分类,在开始数据后添加 +1 表示有主播上线,在结束时间添加 -1,表示主播下线

select room_id, anchor_id, start_time action_time, 1 user_num
from temp_online_data
union all
select room_id, anchor_id, end_time action_time, -1 user_num
from temp_online_data;
0    1004    2021-06-14 13:15:12    1
0    1002    2021-06-14 15:12:12    1
0    1005    2021-06-14 15:18:12    1
0    1001    2021-06-14 20:12:12    1
0    1006    2021-06-14 21:12:12    1
0    1007    2021-06-14 22:12:12    1
1    1001    2021-06-14 12:12:12    1
1    1003    2021-06-14 13:12:12    1
1    1004    2021-06-14 13:15:12    1
1    1002    2021-06-14 15:12:12    1
0    1004    2021-06-14 20:12:12    -1
0    1002    2021-06-14 16:12:12    -1
0    1005    2021-06-14 20:12:12    -1
0    1001    2021-06-14 23:12:12    -1
0    1006    2021-06-14 23:15:12    -1
0    1007    2021-06-14 23:10:12    -1
1    1001    2021-06-14 18:12:12    -1
1    1003    2021-06-14 16:12:12    -1
1    1004    2021-06-14 20:12:12    -1
1    1002    2021-06-14 16:12:12    -1

2、按照时间 计算每个直播间累加人数

select room_id, anchor_id, action_time, sum(user_num) over (partition by room_id order by action_time) user_nums
from (select room_id, anchor_id, start_time action_time, 1 user_num
      from temp_online_data
      union all
      select room_id, anchor_id, end_time action_time, -1 user_num
      from temp_online_data) y;
1    1001    2021-06-14 12:12:12    1
1    1003    2021-06-14 13:12:12    2
1    1004    2021-06-14 13:15:12    3
1    1002    2021-06-14 15:12:12    4
1    1003    2021-06-14 16:12:12    2
1    1002    2021-06-14 16:12:12    2
1    1001    2021-06-14 18:12:12    1
1    1004    2021-06-14 20:12:12    0
0    1004    2021-06-14 13:15:12    1
0    1002    2021-06-14 15:12:12    2
0    1005    2021-06-14 15:18:12    3
0    1002    2021-06-14 16:12:12    2
0    1001    2021-06-14 20:12:12    1
0    1004    2021-06-14 20:12:12    1
0    1005    2021-06-14 20:12:12    1
0    1006    2021-06-14 21:12:12    2
0    1007    2021-06-14 22:12:12    3
0    1007    2021-06-14 23:10:12    2
0    1001    2021-06-14 23:12:12    1
0    1006    2021-06-14 23:15:12    0

3、分组求最大值

select room_id, max(user_nums) user_nums_max
from (select room_id, anchor_id, action_time, sum(user_num) over (partition by room_id order by action_time) user_nums
      from (select room_id, anchor_id, start_time action_time, 1 user_num
            from temp_online_data
            union all
            select room_id, anchor_id, end_time action_time, -1 user_num
            from temp_online_data) y) y
group by room_id;
1    4
0    3
posted @ 2022-11-19 11:33  晓枫的春天  阅读(73)  评论(0编辑  收藏  举报