1

Hive SQL必刷练习题:同时在线人数问题(*****)

https://blog.csdn.net/Mikkkee/article/details/136776193

 

 

--Drop
DROP TABLE IF EXISTS test_live_events;
 
 
--DDL
CREATE TABLE IF NOT EXISTS test_live_events
(
    user_id      INT COMMENT '用户id',
    live_id      INT COMMENT '直播id',
    in_datetime  STRING COMMENT '进入直播间时间',
    out_datetime STRING COMMENT '离开直播间时间'
) COMMENT '直播间访问记录' STORED AS ORC TBLPROPERTIES ( "orc.compress" = "SNAPPY", "discover.partitions" = "false" );
 
 
--Insert items
INSERT OVERWRITE TABLE test_live_events VALUES 
(1010, 1, '2023-08-12 19:00:00', '2'),
(100, 1, '2023-08-12 19:30:00', '2023-08-12 19:53:00'),
(100, 2, '2023-08-12 21:01:00', '2023-08-12 22:00:00'),
(101, 1, '2023-08-12 19:05:00', '2023-08-12 20:55:00'),
(101, 2, '2023-08-12 21:05:00', '2023-08-12 21:58:00'),
(102, 1, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(102, 2, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(102, 3, '2023-08-12 21:05:00', '2023-08-12 22:05:00'),
(104, 1, '2023-08-12 19:00:00', '2023-08-12 20:59:00'),
(104, 2, '2023-08-12 21:57:00', '2023-08-12 22:56:00'),
(105, 2, '2023-08-12 19:10:00', '2023-08-12 19:18:00'),
(106, 3, '2023-08-12 19:01:00', '2023-08-12 21:10:00');









WITH tmp_live_status AS (

create table  tmp as 
SELECT 
  user_id,
  live_id,
  in_datetime AS event_time,
  1 AS status
FROM
  test_live_events
UNION ALL
SELECT 
  user_id,
  live_id,
  out_datetime AS event_time,
  -1 AS status
FROM
  test_live_events
)
--创建临时表 ,登入的时间 后面加个自动 为1  ,登出的时间 数据 加个自动 固定值为-1  两个数据结果集union 起来 
SELECT live_id, MAX(online_cnt) AS online_cnt FROM ( SELECT user_id, live_id, event_time, status, SUM(status) OVER (PARTITION BY live_id ORDER BY event_time) AS online_cnt
-- 基于 迁入\出 时间进行排序, 基于直播间id进行分组. sum 迁入迁出的度量值 . 最大的那个就是某个时间点的 直播间
最多人数 FROM tmp ) a GROUP BY live_id ;

 

posted @ 2024-08-04 18:51  萌哥-爱学习  阅读(6)  评论(0编辑  收藏  举报