hive 求各直播间同时在线最大人数
需求描述
需求有以下数据
create table temp_online_data ( room_id varchar(2) comment '直播间ID', anchor_id varchar(5) comment '播主ID', start_time varchar(20) comment '上播时间', end_time varchar(20) comment '下播时间' ) row format delimited fields terminated by ',' stored as orc tblproperties ("orc.compress" = "snappy"); insert into temp_online_data(room_id, anchor_id, start_time, end_time) values ('0', '1004', '2021-06-14 13:15:12', '2021-06-14 20:12:12'), ('0', '1002', '2021-06-14 15:12:12', '2021-06-14 16:12:12'), ('0', '1005', '2021-06-14 15:18:12', '2021-06-14 20:12:12'), ('0', '1001', '2021-06-14 20:12:12', '2021-06-14 23:12:12'), ('0', '1006', '2021-06-14 21:12:12', '2021-06-14 23:15:12'), ('0', '1007', '2021-06-14 22:12:12', '2021-06-14 23:10:12'), ('1', '1001', '2021-06-14 12:12:12', '2021-06-14 18:12:12'), ('1', '1003', '2021-06-14 13:12:12', '2021-06-14 16:12:12'), ('1', '1004', '2021-06-14 13:15:12', '2021-06-14 20:12:12'), ('1', '1002', '2021-06-14 15:12:12', '2021-06-14 16:12:12');
现在要求出每个直播间同时在线的播主人数;
需求实现
1、将数据分类,在开始数据后添加 +1 表示有主播上线,在结束时间添加 -1,表示主播下线
select room_id, anchor_id, start_time action_time, 1 user_num from temp_online_data union all select room_id, anchor_id, end_time action_time, -1 user_num from temp_online_data; 0 1004 2021-06-14 13:15:12 1 0 1002 2021-06-14 15:12:12 1 0 1005 2021-06-14 15:18:12 1 0 1001 2021-06-14 20:12:12 1 0 1006 2021-06-14 21:12:12 1 0 1007 2021-06-14 22:12:12 1 1 1001 2021-06-14 12:12:12 1 1 1003 2021-06-14 13:12:12 1 1 1004 2021-06-14 13:15:12 1 1 1002 2021-06-14 15:12:12 1 0 1004 2021-06-14 20:12:12 -1 0 1002 2021-06-14 16:12:12 -1 0 1005 2021-06-14 20:12:12 -1 0 1001 2021-06-14 23:12:12 -1 0 1006 2021-06-14 23:15:12 -1 0 1007 2021-06-14 23:10:12 -1 1 1001 2021-06-14 18:12:12 -1 1 1003 2021-06-14 16:12:12 -1 1 1004 2021-06-14 20:12:12 -1 1 1002 2021-06-14 16:12:12 -1
2、按照时间 计算每个直播间累加人数
select room_id, anchor_id, action_time, sum(user_num) over (partition by room_id order by action_time) user_nums from (select room_id, anchor_id, start_time action_time, 1 user_num from temp_online_data union all select room_id, anchor_id, end_time action_time, -1 user_num from temp_online_data) y; 1 1001 2021-06-14 12:12:12 1 1 1003 2021-06-14 13:12:12 2 1 1004 2021-06-14 13:15:12 3 1 1002 2021-06-14 15:12:12 4 1 1003 2021-06-14 16:12:12 2 1 1002 2021-06-14 16:12:12 2 1 1001 2021-06-14 18:12:12 1 1 1004 2021-06-14 20:12:12 0 0 1004 2021-06-14 13:15:12 1 0 1002 2021-06-14 15:12:12 2 0 1005 2021-06-14 15:18:12 3 0 1002 2021-06-14 16:12:12 2 0 1001 2021-06-14 20:12:12 1 0 1004 2021-06-14 20:12:12 1 0 1005 2021-06-14 20:12:12 1 0 1006 2021-06-14 21:12:12 2 0 1007 2021-06-14 22:12:12 3 0 1007 2021-06-14 23:10:12 2 0 1001 2021-06-14 23:12:12 1 0 1006 2021-06-14 23:15:12 0
3、分组求最大值
select room_id, max(user_nums) user_nums_max from (select room_id, anchor_id, action_time, sum(user_num) over (partition by room_id order by action_time) user_nums from (select room_id, anchor_id, start_time action_time, 1 user_num from temp_online_data union all select room_id, anchor_id, end_time action_time, -1 user_num from temp_online_data) y) y group by room_id; 1 4 0 3