hive 编写连续N天登录的总人数
1、数据文件:
1 2019-07-11 1 1 2019-07-12 1 1 2019-07-13 1 1 2019-07-14 1 1 2019-07-15 1 1 2019-07-16 1 1 2019-07-17 1 1 2019-07-18 1 2 2019-07-11 1 2 2019-07-12 1 2 2019-07-13 0 2 2019-07-14 1 2 2019-07-15 1 2 2019-07-16 0 2 2019-07-17 1 2 2019-07-18 0 3 2019-07-11 1 3 2019-07-12 1 3 2019-07-13 1 3 2019-07-14 1 3 2019-07-15 1 3 2019-07-16 1 3 2019-07-17 1 3 2019-07-18 1
2、建表
create table if not exists login( uid string, dt string, login_status int ) row format delimited fields terminated by ' ' lines terminated by '\n' stored as textfile ;
load data local inpath '/root/hivedata/login.txt' into table login;
3、sql操作
select count(1) from ( select uid, dt from ( select t1.uid uid, date_sub(t1.dt,t1.r) dt from (select uid, dt, row_number() over(distribute by uid sort by dt) r from login where login_status=1) t1 ) t2 group by uid,dt having count(1)>7 ) t3;
SELECT uid,date_sub(dt,rank) as login_group,min(dt) as start_login_time ,max(dt) as end_login_time,count(1) as continuous_days FROM ( SELECT uid,dt,row_number() OVER(PARTITION BY uid order by dt) as rank FROM login where login_status = 1 ) a group by uid,date_sub(dt,rank);