SQL查询用户连续登录天数
一、连续3天登录用户
1.1建表
create table user_login(
user_id int,
login_time datetime,
)
1.2数据插入
insert into user_login values (1,'2022-06-01 11:00:00.000');
insert into user_login values (1,'2022-06-01 12:00:00.000');
insert into user_login values (1,'2022-06-01 12:00:00.000');
insert into user_login values (1,'2022-06-02 11:00:00.000');
insert into user_login values (1,'2022-06-03 11:00:00.000');
insert into user_login values (2,'2022-06-01 11:00:00.000');
insert into user_login values (2,'2022-06-02 11:00:00.000');
insert into user_login values (2,'2022-06-04 11:00:00.000');
insert into user_login values (3,'2022-06-01 11:00:00.000');
insert into user_login values (3,'2022-06-02 11:00:00.000');
insert into user_login values (3,'2022-06-04 11:00:00.000');
insert into user_login values (3,'2022-06-05 11:00:00.000');
insert into user_login values (3,'2022-06-06 11:00:00.000');
insert into user_login values (3,'2022-06-07 11:00:00.000');
insert into user_login values (3,'2022-06-08 11:00:00.000');
1.3SQL查询
SQL主要逻辑:
(1)通过开窗函数对日期进行每个用户的登录日期排序
Select user_id
,convert(varchar(100),login_time,23) as login_date
,row_number() over(partition by user_id order by convert(varchar(100),login_time,23)) as rn
From user_login;
(2)将登录日期和排序序号相减得到相同连续日期
Select user_id
,login_date
,datediff(day,login_date,rn) as interval_days
From (Select user_id
,convert(varchar(100),login_time,23) as login_date
,row_number() over(partition by user_id order by convert(varchar(100),login_time,23)) as rn
From user_login
);
(3)按照得到的相同连续日期分组,count大于3的为连续登录超过3天的。
select B.user_id
from(Select user_id
,login_date
,datediff(day,login_date,rn) as interval_days
From (Select user_id
,convert(varchar(100),login_time,23) as login_date
,row_number() over(partition by user_id order by convert(varchar(100),login_time,23)) as rn
From user_login
)A
)B
group by B.user_id
,B.inteval_days
having count(1) >= 3;
二、连续3天登录,中间间隔一/N天用户也算
建表及数据插入参考1.1,1.2。
2.1获取上次登录日期
用上一次登陆日期,与此次的登陆日期对比,可以判断是否符合小于等于2天的条件。
此处使用lag窗口函数,开窗查询前一行数据,注意每个用户的第一条登陆信息上一天为null,这里给1970-01-01,防止空指针异常。
select user_id
,login_date
,lag(login_date,1,'1970-01-01') over(partition by user_id order by login_date) as last_login_date
from test_login; t1
查询结果:
user_id login_date last_login_date
1001 2021/11/5 1970/1/1
1001 2021/11/6 2021/11/5
1001 2021/11/8 2021/11/6
1001 2021/11/10 2021/11/8
1001 2021/11/15 2021/11/10
1001 2021/11/16 2021/11/15
1001 2021/11/18 2021/11/16
1001 2021/11/19 2021/11/18
1002 2021/11/5 1970/1/1
1002 2021/11/7 2021/11/5
1002 2021/11/9 2021/11/7
1002 2021/11/11 2021/11/9
1002 2021/11/13 2021/11/11
1002 2021/11/16 2021/11/13
2.2获取相邻登录日期差值
将当前行数据的登录日期减去上一行数据的登录日期。
select user_id
,login_date
,datediff(login_date,last_login_date) as date_diff
from t1; t2
查询结果:
user_id login_date date_diff
1001 2021-11-05 18936
1001 2021-11-06 1
1001 2021-11-08 2
1001 2021-11-10 2
1001 2021-11-15 5
1001 2021-11-16 1
1001 2021-11-18 2
1001 2021-11-19 1
1002 2021-11-05 18936
1002 2021-11-07 2
1002 2021-11-09 2
1002 2021-11-11 2
1002 2021-11-13 2
1002 2021-11-16 3
2.3划分连续登录组
相邻两次登录日期差值小于等于2的数据为一个连续登录组,每遇到相邻两次登录日期差值大于2的数据,分组编号+1,更换连续登录组。
select user_id
,login_date
,sum(if(date_diff>2,1,0)) over(partition by user_id order by login_date rows between unboundedpreceding and current row) as group_id
from t2; t3
查询结果:
user_id login_date group_id
1001 2021-11-05 1
1001 2021-11-06 1
1001 2021-11-08 1
1001 2021-11-10 1
1001 2021-11-15 2
1001 2021-11-16 2
1001 2021-11-18 2
1001 2021-11-19 2
1002 2021-11-05 1
1002 2021-11-07 1
1002 2021-11-09 1
1002 2021-11-11 1
1002 2021-11-13 1
1002 2021-11-16 2
2.4计算连续登录天数
每个连续登录日期组中,最大日期-最小日期差值+1 即为连续登录天数。
select user_id
,group_id
,(datediff(max(login_date),min(login_date))+1) as continuous_login_days
from t3
group by user_id
,group_id; t4
查询结果:
user_id group_id continuous_login_days
1001 1 6
1001 2 5
1002 1 9
1002 2 1
2.5计算每用户最大登录天数
按用户取最大连续登录天数。
select user_id
,max(continuous_login_days) as max_continuous_login_days
from t4
group by user_id;
查询结果:
user_id max_continuous_login_days
1001 6
1002 9
2.6最终代码测试
select
user_id,
max(continuous_login_days) max_continuous_login_days
from
(
select
user_id,
group_id,
(datediff(max(login_date), min(login_date)) + 1) continuous_login_days
from
(
select
user_id,
login_date,
sum(if(date_diff > 2, 1, 0))
over (partition by user_id order by login_date rows between unbounded preceding and current row) group_id
from
(
select
user_id,
login_date,
datediff(login_date, last_login_date) date_diff
from
(select
user_id,
login_date,
lag(login_date, 1, '1970-01-01')
over (partition by user_id order by login_date) last_login_date
from
test_login) t1) t2) t3
group by user_id, group_id) t4
group by user_id;
三、查询用户最大连续登录天数
3.1插入数据
样例数据如下 login_log:
user_id login_time
1 2022-11-28
1 2022-12-01
1 2022-12-02
1 2022-12-03
2 2022-12-01
2 2022-12-04
3.2开窗函数,对登录时间进行分组排序
select user_id
, login_time
,row_number() over(partition by user_id order by login_time) as num
from login_log;
查询结果:
user_id login_time num
1 2022-11-28 1
1 2022-12-01 2
1 2022-12-02 3
1 2022-12-03 4
2 2022-12-01 1
2 2022-12-04 2
3.3利用等差数列的特性,将登录日期和排序序号相减得到相同连续日期
select t.user_id
,t.login_time
,date_sub(login_time, INTERVAL t.num DAY) as date_rslt
from (select user_id
,login_time
,row_number() over(partition by user_id order by login_time) num
from login_log
) t;
注:INTERVAL 关键字可以用于计算时间间隔, date_sub(login_time, INTERVAL t.num DAY)表示登录时间减去 num 天;若 DAY 改成 HOUR 表示减去 num 小时。
查询结果:
user_id login_time date_rslt
1 2022-11-28 2022-11-27
1 2022-12-01 2022-11-29
1 2022-12-02 2022-11-29
1 2022-12-03 2022-11-29
2 2022-12-01 2022-11-30
2 2022-12-04 2022-12-02
3.4分组获取用户连续登录天数
select a.user_id
,a.date_rslt
,count(1) as cnt
from (select t.user_id
,t.login_time
,date_sub(login_time, INTERVAL t.num DAY) as date_rslt
from (select user_id
,login_time
,row_number() over(partition by user_id order by login_time) as num
from login_log
) t
) a
group by a.user_id
,a.date_rslt;
查询结果:
1 2022-11-27 1
1 2022-11-29 3
1 2022-11-30 1
2 2022-12-02 1
3.5计算每个用户最大登录天数
select user_id
,max(cnt) as max
from (select a.user_id
,a.date_rslt
,count(1) as cnt
from (select t.user_id
,t.login_time
,date_sub(login_time, INTERVAL t.num DAY) as date_rslt
from (select user_id
,login_time
,row_number() over(partition by user_id order by login_time) as num
from login_log
) t
) a
group by a.user_id
,a.date_rslt
)M
group by user_id;
查询结果
1 3
2 1