用户留存率
计算用户留存率首先要知道什么是留存用户。所谓留存用户,就是当日新增用户在之后的时间继续活跃的用户;
次日留存率:
1月1日,新增100人,1月2日,这些人中有30人活跃--留存率30%;
两日留存率:
1月1日,新增100人,1月3日,这些人中有20人活跃--留存率20%;
依此类推!
案例:hive建表并插入数据
drop table if exists ads_user_retention_day_rate; create external table ads_user_retention_day_rate ( `stat_date` string comment '统计日期', `create_date` string comment '设备新增日期', `retention_day` int comment '截止当前日期留存天数', `retention_count` bigint comment '留存数量', `new_mid_count` bigint comment '设备新增数量', `retention_ratio` decimal(16,2) comment '留存率' )comment '留存率' row format delimited fields terminated by '\t' location '/datawarehouse/app/ads_user_retention_day_rate';
计算一日、两日、三日留存率:
insert into table ads_user_retention_day_rate select '2021-05-20', date_add('2021-05-20',-1), 1, sum(if (login_date_first=date_add('2021-05-20',-1) and login_date_last= '2021-05-20',1,0)), sum(if (login_date_first=date_add('2021-05-20',-1),1,0)), sum(if (login_date_first=date_add('2021-05-20',-1) and login_date_last= '2021-05-20',1,0))/ sum(if (login_date_first=date_add('2021-05-20',-1),1,0))*100 from dwt_uv_topic
union all
select
'2021-05-20',
date_add('2021-05-20',-2),
2,
sum(if (login_date_first=data_add('2021-05-20',-2) and login_date_last='2021-03-21',1,0)),
sum(if (login_date_first=data_add('2021-05-20',-2),1,0) ),
sum(if (login_date_first=data_add('2021-05-20',-2) and login_date_last='2021-03-21',1,0))/sum(if (login_date_first=data_add('2021-05-20',-2),1,0))*100
from dwt_uv_topic
union all
select
'2021-05-20',
date_add('2021-05-20',-3),
3,
sum(if (login_date_first=data_add('2021-05-20',-3) and login_date_last='2021-03-21',1,0)),
sum(if (login_date_first=data_add('2021-05-20',-3),1,0) ),
sum(if (login_date_first=data_add('2021-05-20',-3) and login_date_last='2021-03-21',1,0))/sum(if (login_date_first=data_add('2021-05-20',-3),1,0))*100
from dwt_uv_topic