Hive SQL必刷练习题:同时在线人数问题(*****)
https://blog.csdn.net/Mikkkee/article/details/136776193
--Drop DROP TABLE IF EXISTS test_live_events; --DDL CREATE TABLE IF NOT EXISTS test_live_events ( user_id INT COMMENT '用户id', live_id INT COMMENT '直播id', in_datetime STRING COMMENT '进入直播间时间', out_datetime STRING COMMENT '离开直播间时间' ) COMMENT '直播间访问记录' STORED AS ORC TBLPROPERTIES ( "orc.compress" = "SNAPPY", "discover.partitions" = "false" ); --Insert items INSERT OVERWRITE TABLE test_live_events VALUES (1010, 1, '2023-08-12 19:00:00', '2'), (100, 1, '2023-08-12 19:30:00', '2023-08-12 19:53:00'), (100, 2, '2023-08-12 21:01:00', '2023-08-12 22:00:00'), (101, 1, '2023-08-12 19:05:00', '2023-08-12 20:55:00'), (101, 2, '2023-08-12 21:05:00', '2023-08-12 21:58:00'), (102, 1, '2023-08-12 21:05:00', '2023-08-12 22:05:00'), (102, 2, '2023-08-12 19:00:00', '2023-08-12 20:59:00'), (102, 3, '2023-08-12 21:05:00', '2023-08-12 22:05:00'), (104, 1, '2023-08-12 19:00:00', '2023-08-12 20:59:00'), (104, 2, '2023-08-12 21:57:00', '2023-08-12 22:56:00'), (105, 2, '2023-08-12 19:10:00', '2023-08-12 19:18:00'), (106, 3, '2023-08-12 19:01:00', '2023-08-12 21:10:00'); WITH tmp_live_status AS ( create table tmp as SELECT user_id, live_id, in_datetime AS event_time, 1 AS status FROM test_live_events UNION ALL SELECT user_id, live_id, out_datetime AS event_time, -1 AS status FROM test_live_events ) --创建临时表 ,登入的时间 后面加个自动 为1 ,登出的时间 数据 加个自动 固定值为-1 两个数据结果集union 起来
SELECT live_id, MAX(online_cnt) AS online_cnt FROM ( SELECT user_id, live_id, event_time, status, SUM(status) OVER (PARTITION BY live_id ORDER BY event_time) AS online_cnt
-- 基于 迁入\出 时间进行排序, 基于直播间id进行分组. sum 迁入迁出的度量值 . 最大的那个就是某个时间点的 直播间
最多人数 FROM tmp ) a GROUP BY live_id ;