|NO.Z.00022|——————————|BigDataEnd|——|Hadoop&PB级数仓.V06|——|PB数仓.v06|会员活跃度分析|活跃会员&DWS建表&DWS加载数据|

一、活跃会员
### --- 活跃会员

~~~     活跃会员:打开应用的会员即为活跃会员;
~~~     新增会员:第一次使用应用的用户,定义为新增会员;
~~~     留存会员:某段时间的新增会员,经过一段时间后,仍继续使用应用认为是留存会员;
~~~     活跃会员指标需求:每日、每周、每月的活跃会员数
~~~     DWD:会员的每日启动信息明细(会员都是活跃会员;某个会员可能会出现多次)
~~~     DWS:每日活跃会员信息(关键)、每周活跃会员信息、每月活跃会员信息
~~~     每日活跃会员信息 ===> 每周活跃会员信息
~~~     每日活跃会员信息 ===> 每月活跃会员信息
~~~     ADS:每日、每周、每月活跃会员数(输出)
~~~     # ADS表结构:
~~~     备注:周、月为自然周、自然月

daycnt weekcnt monthcnt dt
### --- 处理过程:

~~~     建表(每日、每周、每月活跃会员信息)
~~~     每日启动明细 ===> 每日活跃会员
~~~     每日活跃会员 => 每周活跃会员;每日活跃会员 => 每月活跃会员
~~~     汇总生成ADS层的数据
二、活跃会员计算:创建DWS层表
### --- 创建DWS层表
~~~     # 语法

use dws;
drop table if exists dws.dws_member_start_day;
create table dws.dws_member_start_day
(
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string
) COMMENT '会员日启动汇总'
partitioned by(dt string)
stored as parquet;
~~~     # 语法

drop table if exists dws.dws_member_start_week;
create table dws.dws_member_start_week
(
`device_id` string,
`uid` string,
`app_v` string,
`os_type` string,
`language` string,
    `channel` string,
`area` string,
`brand` string,
`week` string
) COMMENT '会员周启动汇总'
PARTITIONED BY (`dt` string)
stored as parquet;
~~~     # 语法

drop table if exists dws.dws_member_start_month;
create table dws.dws_member_start_month(
`device_id` string,
`uid` string,
`app_v` string,
    `os_type` string,
`language` string,
`channel` string,
`area` string,
`brand` string,
`month` string
) COMMENT '会员月启动汇总'
PARTITIONED BY (`dt` string)
stored as parquet;
~~~     # 操作示例

hive (dwd)> use dws;
OK
hive (dws)> drop table if exists dws.dws_member_start_day;
OK
hive (dws)> create table dws.dws_member_start_day
          > (
          > `device_id` string,
          > `uid` string,
          > `app_v` string,
          > `os_type` string,
          > `language` string,
          > `channel` string,
          > `area` string,
          > `brand` string
          > ) COMMENT '会员日启动汇总'
          > partitioned by(dt string)
          > stored as parquet;
OK
~~~     # 操作实例

hive (dws)> drop table if exists dws.dws_member_start_week;
OK
hive (dws)> create table dws.dws_member_start_week
          > (
          > `device_id` string,
          > `uid` string,
          > `app_v` string,
          > `os_type` string,
          > `language` string,
          >     `channel` string,
          > `area` string,
          > `brand` string,
          > `week` string
          > ) COMMENT '会员周启动汇总'
          > PARTITIONED BY (`dt` string)
          > stored as parquet;
OK
~~~     # 操作实例

hive (dws)> drop table if exists dws.dws_member_start_month;
OK
hive (dws)> create table dws.dws_member_start_month(
          > `device_id` string,
          > `uid` string,
          > `app_v` string,
          >     `os_type` string,
          > `language` string,
          > `channel` string,
          > `area` string,
          > `brand` string,
          > `month` string
          > ) COMMENT '会员月启动汇总'
          > PARTITIONED BY (`dt` string)
          > stored as parquet;
OK
三、活跃会员计算:加载数据
### --- 创建DWS层级加载数据脚本
~~~     注意shell的引号:# ODS => DWD => DWS(每日、每周、每月活跃会员的汇总表)

[root@hadoop02 ~]# vim /data/yanqidw/script/member_active/dws_load_member_start.sh
#!/bin/bash

source /etc/profile

# 可以输入日期;如果未输入日期取昨天的时间
if [ -n "$1" ]
then
   do_date=$1
else
   do_date=`date -d "-1 day" +%F`
fi

# 定义要执行的SQL
# 汇总得到每日活跃会员信息;每日数据汇总得到每周、每月数据
sql="
-- 汇总得到每日活跃会员
insert overwrite table dws.dws_member_start_day
partition(dt='$do_date')
select device_id,
concat_ws('|', collect_set(uid)),
concat_ws('|', collect_set(app_v)),
concat_ws('|', collect_set(os_type)),
concat_ws('|', collect_set(language)),
concat_ws('|', collect_set(channel)),
concat_ws('|', collect_set(area)),
concat_ws('|', collect_set(brand))
from dwd.dwd_start_log
where dt='$do_date'
group by device_id;
-- 汇总得到每周活跃会员
insert overwrite table dws.dws_member_start_week
partition(dt='$do_date')
select device_id,
concat_ws('|', collect_set(uid)),
concat_ws('|', collect_set(app_v)),
concat_ws('|', collect_set(os_type)),
concat_ws('|', collect_set(language)),
concat_ws('|', collect_set(channel)),
concat_ws('|', collect_set(area)),
concat_ws('|', collect_set(brand)),
date_add(next_day('$do_date', 'mo'), -7)
from dws.dws_member_start_day
where dt >= date_add(next_day('$do_date', 'mo'), -7)
and dt <= '$do_date'
group by device_id;
-- 汇总得到每月活跃会员
insert overwrite table dws.dws_member_start_month
partition(dt='$do_date')
select device_id,
concat_ws('|', collect_set(uid)),
concat_ws('|', collect_set(app_v)),
concat_ws('|', collect_set(os_type)),
concat_ws('|', collect_set(language)),
concat_ws('|', collect_set(channel)),
concat_ws('|', collect_set(area)),
concat_ws('|', collect_set(brand)),
date_format('$do_date', 'yyyy-MM')
from dws.dws_member_start_day
where dt >= date_format('$do_date', 'yyyy-MM-01')
and dt <= '$do_date'
group by device_id;
"
hive -e "$sql"
### --- 为脚本授予可执行权限并验证是否加载数据
~~~     # 授予可执行权限

[root@hadoop02 ~]# chmod +x /data/yanqidw/script/member_active/dws_load_member_start.sh
~~~     # 执行该脚本,加载数据

[root@hadoop02 ~]# /data/yanqidw/script/member_active/dws_load_member_start.sh
~~~     # 查看每日的数据
hive (dws)> show partitions dws_member_start_day;
partition
dt=2021-09-28
hive (dws)> select count(*) from dws.dws_member_start_day;
9999
                         
~~~     # 查看每周的数据
hive (dws)> show partitions dws_member_start_week;
partition
dt=2021-09-28
hive (dws)> select count(*) from dws.dws_member_start_week;
9999
                         
~~~     # 查看每月的数据                       
hive (dws)> show partitions dws_member_start_month;
partition
dt=2021-09-28
hive (dws)> select count(*) from dws.dws_member_start_month;
9999

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

 

posted on   yanqi_vip  阅读(46)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示