王昱棋

导航

hive 编写连续N天登录的总人数

1、数据文件:

1 2019-07-11 1
1 2019-07-12 1
1 2019-07-13 1
1 2019-07-14 1
1 2019-07-15 1
1 2019-07-16 1
1 2019-07-17 1
1 2019-07-18 1
2 2019-07-11 1
2 2019-07-12 1
2 2019-07-13 0
2 2019-07-14 1
2 2019-07-15 1
2 2019-07-16 0
2 2019-07-17 1
2 2019-07-18 0
3 2019-07-11 1
3 2019-07-12 1
3 2019-07-13 1
3 2019-07-14 1
3 2019-07-15 1
3 2019-07-16 1
3 2019-07-17 1
3 2019-07-18 1

2、建表

create table if not exists login(
uid string,
dt string,
login_status int
)
row format delimited fields terminated by ' '
lines terminated by '\n'
stored as textfile
;
load data local inpath '/root/hivedata/login.txt' into table login;

3、sql操作

select
count(1)
from
(
select
uid,
dt
from
(
select
t1.uid uid,
date_sub(t1.dt,t1.r) dt
from
(select
uid,
dt,
row_number() over(distribute by uid sort by dt) r
from login
where login_status=1) t1
) t2 
group by uid,dt having count(1)>7
)
t3;

SELECT uid,date_sub(dt,rank) as login_group,min(dt) as start_login_time
,max(dt) as end_login_time,count(1) as continuous_days
FROM (
SELECT uid,dt,row_number() OVER(PARTITION BY uid order by dt) as rank FROM login where login_status = 1
) a group by uid,date_sub(dt,rank);

 

posted on 2020-08-23 21:49  王昱棋  阅读(461)  评论(0编辑  收藏  举报