|NO.Z.00026|——————————|BigDataEnd|——|Hadoop&PB级数仓.V10|——|PB数仓.v10|会员活跃度分析|留存会员&DWS层计算&加载&ADS层计算&加载&近三天留存会员计算|

一、留存会员
### --- 留存会员

~~~     留存会员与留存率:某段时间的新增会员,经过一段时间后,仍继续使用应用认为是
~~~     留存会员;这部分会员占当时新增会员的比例为留存率。
~~~     需求:1日、2日、3日的会员留存数和会员留存率
~~~     10W新会员:dws_member_add_day(dt=08-01)明细
~~~     3W:特点 在1号是新会员,在2日启动了(2日的启动日志)
~~~     dws_member_start_day
30 31 1 2  
    10W新会员 3W 1日留存数
  20W   5W 2日留存数
30W     4W 日留存数
二、留存会员计算:创建DWS层表
### --- 创建DWS层表

~~~     # 语法
~~~     会员留存明细
use dws;
drop table if exists dws.dws_member_retention_day;
create table dws.dws_member_retention_day
(
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string,
`add_date` string comment '会员新增时间',
`retention_date` int comment '留存天数'
)COMMENT '每日会员留存明细'
PARTITIONED BY (`dt` string)
stored as parquet;
~~~     # 操作实例

hive (default)> use dws;
hive (dws)> drop table if exists dws.dws_member_retention_day;
hive (dws)> create table dws.dws_member_retention_day
          > (
          > `device_id` string,
          > `uid` string,
          > `app_v` string,
          > `os_type` string,
          > `language` string,
          > `channel` string,
          > `area` string,
          > `brand` string,
          > `add_date` string comment '会员新增时间',
          > `retention_date` int comment '留存天数'
          > )COMMENT '每日会员留存明细'
          > PARTITIONED BY (`dt` string)
          > stored as parquet;
三、留存会员计算:加载DWS层数据
### --- 加载DWS层数据

[root@hadoop02 ~]# vim /data/yanqidw/script/member_active/dws1_load_member_retention_day.sh
#!/bin/bash

source /etc/profile

if [ -n "$1" ] ;
then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
insert overwrite table dws.dws_member_retention_day partition(dt='$do_date')
(
select t2.device_id,
    t2.uid,
    t2.app_v,
    t2.os_type,
    t2.language,
    t2.channel,
    t2.area,
    t2.brand,
    t2.dt add_date,
    1
from dws.dws_member_start_day t1 join dws.dws_member_add_day t2 on t1.device_id=t2.device_id
where t2.dt=date_add('$do_date', -1) and t1.dt='$do_date'

union all
select t2.device_id,
    t2.uid,
    t2.app_v,
    t2.os_type,
    t2.language,
    t2.channel,
    t2.area,
    t2.brand,
    t2.dt add_date,
    2
from dws.dws_member_start_day t1 join dws.dws_member_add_day t2 on t1.device_id=t2.device_id
where t2.dt=date_add('$do_date', -2) and t1.dt='$do_date'

union all

select t2.device_id,
    t2.uid,
    t2.app_v,
    t2.os_type,
    t2.language,
    t2.channel,
    t2.area,
    t2.brand,
    t2.dt add_date,
    3
from dws.dws_member_start_day t1 join dws.dws_member_add_day t2 on t1.device_id=t2.device_id
where t2.dt=date_add('$do_date', -3) and t1.dt='$do_date'
);
"

hive -e "$sql"
~~~     # 授予可执行权限并加载数据

[root@hadoop02 ~]# chmod +x /data/yanqidw/script/member_active/dws1_load_member_retention_day.sh
[root@hadoop02 ~]# /data/yanqidw/script/member_active/dws1_load_member_retention_day.sh
### --- 报错处理
~~~     return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask一般是内部错误
~~~     找日志(hive.log【简略】 / MR的日志【详细】)hive.log ===>
~~~     缺省情况下 /tmp/root/hive.log (hive-site.conf)MR的日志 ===> 启动historyserver、
~~~     日志聚合 + SQL运行在集群模式

FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask
~~~     # 改写SQL

[root@hadoop02 ~]# cat /data/yanqidw/script/member_active/dws_load_member_retention_day.sh
#!/bin/bash

source /etc/profile

if [ -n "$1" ] ;then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
drop table if exists tmp.tmp_member_retention;
create table tmp.tmp_member_retention as
(
select t2.device_id,
    t2.uid,
    t2.app_v,
    t2.os_type,
    t2.language,
    t2.channel,
    t2.area,
    t2.brand,
    t2.dt add_date,
    1
from dws.dws_member_start_day t1 join dws.dws_member_add_day t2 on t1.device_id=t2.device_id
where t2.dt=date_add('$do_date', -1) and t1.dt='$do_date' union all
select t2.device_id,
    t2.uid,
    t2.app_v,
    t2.os_type,
    t2.language,
    t2.channel,
    t2.area,
    t2.brand,
    t2.dt add_date,
    2
from dws.dws_member_start_day t1 join dws.dws_member_add_day t2 on t1.device_id=t2.device_id
where t2.dt=date_add('$do_date', -2) and t1.dt='$do_date' union all
select t2.device_id,
    t2.uid,
    t2.app_v,
    t2.os_type,
    t2.language,
    t2.channel,
    t2.area,
    t2.brand,
    t2.dt add_date,
    3
from dws.dws_member_start_day t1 join dws.dws_member_add_day t2 on t1.device_id=t2.device_id
where t2.dt=date_add('$do_date', -3) and t1.dt='$do_date'
);
insert overwrite table dws.dws_member_retention_day
partition(dt='$do_date')
select * from tmp.tmp_member_retention;
"
hive -e "$sql"
~~~     # 授予可执行权限并加载数据

[root@hadoop02 ~]# chmod +x /data/yanqidw/script/member_active/dws_load_member_retention_day.sh
[root@hadoop02 ~]# /data/yanqidw/script/member_active/dws_load_member_retention_day.sh
hive (dws)> show partitions dws_member_retention_day;
partition
dt=2021-09-29

hive (dws)> select * from dws_member_retention_day;
四、创建ADS层表
### --- 创建ADS层表
~~~     # 语法
~~~     会员留存数

use ads;
drop table if exists ads.ads_member_retention_count;

create table ads.ads_member_retention_count
(
`add_date` string comment '新增日期',
`retention_day` int comment '截止当前日期留存天数',
`retention_count` bigint comment '留存数'
) COMMENT '会员留存数'
partitioned by(dt string)
row format delimited fields terminated by ',';

~~~     会员留存率
drop table if exists ads.ads_member_retention_rate;
create table ads.ads_member_retention_rate
(
`add_date` string comment '新增日期',
`retention_day` int comment '截止当前日期留存天数',
`retention_count` bigint comment '留存数',
`new_mid_count` bigint comment '当日会员新增数',
`retention_ratio` decimal(10,2) comment '留存率'
) COMMENT '会员留存率'
partitioned by(dt string)
row format delimited fields terminated by ',';
~~~     # 操作实例

hive (dws)> use ads;
hive (ads)> drop table if exists ads.ads_member_retention_count;
hive (ads)> create table ads.ads_member_retention_count
          > (
          > `add_date` string comment '新增日期',
          > `retention_day` int comment '截止当前日期留存天数',
          > `retention_count` bigint comment '留存数'
          > ) COMMENT '会员留存数'
          > partitioned by(dt string)
          > row format delimited fields terminated by ',';

hive (ads)> drop table if exists ads.ads_member_retention_rate;
hive (ads)> create table ads.ads_member_retention_rate
          > (
          > `add_date` string comment '新增日期',
          > `retention_day` int comment '截止当前日期留存天数',
          > `retention_count` bigint comment '留存数',
          > `new_mid_count` bigint comment '当日会员新增数',
          > `retention_ratio` decimal(10,2) comment '留存率'
          > ) COMMENT '会员留存率'
          > partitioned by(dt string)
          > row format delimited fields terminated by ',';
五、加载ADS层数据
### --- 加载ADS层数据
~~~     备注:最后一条SQL的连接条件应为:t1.add_date=t2.dt。在10.4 节中有详细说明。

[root@hadoop02 ~]# vim /data/yanqidw/script/member_active/ads_load_member_retention.sh
#!/bin/bash

source /etc/profile

if [ -n "$1" ] ;then
    do_date=$1
else
    do_date=`date -d "-1 day" +%F`
fi

sql="
insert overwrite table ads.ads_member_retention_count partition (dt='$do_date')
select add_date, retention_date,
count(*) retention_count from dws.dws_member_retention_day
where dt='$do_date' group by add_date, retention_date;
insert overwrite table ads.ads_member_retention_rate partition (dt='$do_date')

select t1.add_date,
        t1.retention_day,
        t1.retention_count,
        t2.cnt,
        t1.retention_count/t2.cnt*100
from ads.ads_member_retention_count t1 join ads.ads_new_member_cnt t2 on t1.dt=t2.dt
where t1.dt='$do_date';
"

hive -e "$sql"
~~~     # 授予可执行权限并加载数据

[root@hadoop02 ~]# chmod +x /data/yanqidw/script/member_active/ads_load_member_retention.sh
[root@hadoop02 ~]# /data/yanqidw/script/member_active/ads_load_member_retention.sh
hive (ads)> show partitions ads_member_retention_count;
partition
dt=2021-09-29
hive (ads)> show partitions ads_member_retention_rate;
partition
dt=2021-09-29

hive (ads)> select * from  ads_member_retention_count;
hive (ads)> select * from ads_member_retention_rate;
六、小结:会员活跃度--活跃会员数、新增会员、留存会员
### --- 脚本调用次序:

[root@hadoop02 ~]# cd /data/yanqidw/script/member_active/
~~~     # 加载ODS / DWD 层采集
./ods_load_startlog.sh
./dwd_load_startlog.sh
~~~     # 活跃会员
./dws_load_member_start.sh
./ads_load_member_active.sh
~~~     # 新增会员
./dws_load_member_add_day.sh
./ads_load_member_add.sh
~~~     # 会员留存
./dws_load_member_retention_day.sh
./ads_load_member_retention.sh

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

posted on   yanqi_vip  阅读(47)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示