一、数仓搭建 - ADS 层
1.1 设备主题
1.1.1 活跃设备数(日、周、月)
需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句
drop table if exists ads_uv_count; create external table ads_uv_count( `dt` string COMMENT '统计日期', `day_count` bigint COMMENT '当日用户数量', `wk_count` bigint COMMENT '当周用户数量', `mn_count` bigint COMMENT '当月用户数量', `is_weekend` string COMMENT 'Y,N 是否是周末,用于得到本周最终结果', `is_monthend` string COMMENT 'Y,N 是否是月末,用于得到本月最终结果' ) COMMENT '活跃设备数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_uv_count/';
2)导入数据
insert into table ads_uv_count select '2020-03-10' dt, daycount.ct, wkcount.ct, mncount.ct, if(date_add(next_day('2020-03-10','MO'),-1)='2020-03-10','Y','N') , if(last_day('2020-03-10')='2020-03-10','Y','N') from ( select '2020-03-10' dt, count(*) ct from dwt_uv_topic where login_date_last='2020-03-10' )daycount join ( select '2020-03-10' dt, count (*) ct from dwt_uv_topic where login_date_last>=date_add(next_day('2020-03-10','MO'),-7) and login_date_last<= date_add(next_day('2020-03-10','MO'),-1) ) wkcount on daycount.dt=wkcount.dt join ( select '2020-03-10' dt, count (*) ct from dwt_uv_topic where date_format(login_date_last,'yyyy-MM')=date_format('2020-03-10','yyyy-MM') )mncount on daycount.dt=mncount.dt;
3)查询导入结果
1.1.2 每日新增设备
1)建表语句
drop table if exists ads_new_mid_count; create external table ads_new_mid_count ( `create_date` string comment '创建时间' , `new_mid_count` BIGINT comment '新增设备数量' ) COMMENT '每日新增设备信息数量' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_new_mid_count/';
2)导入数据
insert into table ads_new_mid_count select login_date_first, count(*) from dwt_uv_topic where login_date_first='2020-03-10' group by login_date_first;
3)查询导入数据
select * from ads_new_mid_count;
1.1.3 沉默用户数
需求定义:
沉默用户:只在安装当天启动过,且启动时间是在 7 天前
1)建表语句
drop table if exists ads_silent_count; create external table ads_silent_count( `dt` string COMMENT '统计日期', `silent_count` bigint COMMENT '沉默设备数' ) row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_silent_count';
2)导入 2020-03-20 数据
insert into table ads_silent_count select '2020-03-15', count(*) from dwt_uv_topic where login_date_first=login_date_last and login_date_last<=date_add('2020-03-15',-7);
3)查询导入数据
select * from ads_silent_count;
1.1.4 本周回流用户数
需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句
drop table if exists ads_back_count; create external table ads_back_count( `dt` string COMMENT '统计日期', `wk_dt` string COMMENT '统计日期所在周', `wastage_count` bigint COMMENT '回流设备数' ) row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_back_count';
2)导入数据:
insert into table ads_back_count select '2020-03-15', count(*) from ( select mid_id from dwt_uv_topic where login_date_last>=date_add(next_day('2020-03-15','MO'),-7) and login_date_last<= date_add(next_day('2020-03-15','MO'),-1) and login_date_first<date_add(next_day('2020-03-15','MO'),-7) )current_wk left join ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-03-15','MO'),-7*2) and dt<= date_add(next_day('2020-03-15','MO'),-7-1) group by mid_id )last_wk on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null;
3)查询结果
select * from ads_back_count;
1.1.5 流失用户数
需求定义:
流失用户:最近 7 天未活跃的设备
1)建表语句
drop table if exists ads_wastage_count; create external table ads_wastage_count( `dt` string COMMENT '统计日期', `wastage_count` bigint COMMENT '流失设备数' ) row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_wastage_count';
2)导入 2020-03-20 数据
insert into table ads_wastage_count select '2020-03-20', count(*) from ( select mid_id from dwt_uv_topic where login_date_last<=date_add('2020-03-20',-7) group by mid_id )t1;
3)查询结果
select * from ads_wastage_count;
1.1.6 留存率
1)建表语句
drop table if exists ads_user_retention_day_rate; create external table ads_user_retention_day_rate ( `stat_date` string comment '统计日期', `create_date` string comment '设备新增日期', `retention_day` int comment '截止当前日期留存天数', `retention_count` bigint comment '留存数量', `new_mid_count` bigint comment '设备新增数量', `retention_ratio` decimal(10,2) comment '留存率' ) COMMENT '每日用户留存情况' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_retention_day_rate/';
2)导入数据
insert into table ads_user_retention_day_rate select '2020-03-10',--统计日期 date_add('2020-03-10',-1),--新增日期 1,--留存天数 sum(if(login_date_first=date_add('2020-03-10',-1) and login_date_last='2020-03-10',1,0)),--2020-03-09 的 1 日留存数 sum(if(login_date_first=date_add('2020-03-10',-1),1,0)),--2020-03-09 新增 sum(if(login_date_first=date_add('2020-03-10',-1) and login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',- 1),1,0))*100 from dwt_uv_topic union all select '2020-03-10',--统计日期 date_add('2020-03-10',-2),--新增日期 2,--留存天数 sum(if(login_date_first=date_add('2020-03-10',-2) and login_date_last='2020-03-10',1,0)),--2020-03-08 的 2 日留存数 sum(if(login_date_first=date_add('2020-03-10',-2),1,0)),--2020-03-08 新增 sum(if(login_date_first=date_add('2020-03-10',-2) and login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',- 2),1,0))*100 from dwt_uv_topic union all select '2020-03-10',--统计日期 date_add('2020-03-10',-3),--新增日期 3,--留存天数 sum(if(login_date_first=date_add('2020-03-10',-3) and login_date_last='2020-03-10',1,0)),--2020-03-07 的 3 日留存数 sum(if(login_date_first=date_add('2020-03-10',-3),1,0)),--2020-03-07 新增 sum(if(login_date_first=date_add('2020-03-10',-3) and login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',- 3),1,0))*100 from dwt_uv_topic;
3)查询导入数据
select * from ads_user_retention_day_rate;
1.1.7 最近连续三周活跃用户数
1)建表语句
drop table if exists ads_continuity_wk_count; create external table ads_continuity_wk_count( `dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日 期', `wk_dt` string COMMENT '持续时间', `continuity_count` bigint COMMENT '活跃次数' ) row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_continuity_wk_count';
2)导入 2020-03-20 所在周的数据
insert into table ads_continuity_wk_count select '2020-03-15', concat(date_add(next_day('2020-03-15','MO'),-7*3),'_',date_add(next_day(' 2020-03-15','MO'),-1)), count(*) from ( select mid_id from ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-03-10','monday'),-7) and dt<=date_add(next_day('2020-03-10','monday'),-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-03-10','monday'),-7*2) and dt<=date_add(next_day('2020-03-10','monday'),-7-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('2020-03-10','monday'),-7*3) and dt<=date_add(next_day('2020-03-10','monday'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2
3)查询
select * from ads_continuity_wk_count;
1.1.8 最近七天内连续三天活跃用户数
1)建表语句
drop table if exists ads_continuity_uv_count; create external table ads_continuity_uv_count( `dt` string COMMENT '统计日期', `wk_dt` string COMMENT '最近 7 天日期', `continuity_count` bigint ) COMMENT '连续活跃设备数' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_continuity_uv_count';
2)写出导入数据的 SQL 语句
insert into table ads_continuity_uv_count select '2020-03-12', concat(date_add('2020-03-12',-6),'_','2020-03-12'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from dws_uv_detail_daycount where dt>=date_add('2020-03-12',-6) and dt<='2020-03-12' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id )t4;
7.2 会员主题
7.2.1 会员主题信息
1)建表
drop table if exists ads_user_topic; create external table ads_user_topic( `dt` string COMMENT '统计日期', `day_users` string COMMENT '活跃会员数', `day_new_users` string COMMENT '新增会员数', `day_new_payment_users` string COMMENT '新增消费会员数', `payment_users` string COMMENT '总付费会员数', `users` string COMMENT '总会员数', `day_users2users` decimal(10,2) COMMENT '会员活跃率', `payment_users2users` decimal(10,2) COMMENT '会员付费率', `day_new_users2users` decimal(10,2) COMMENT '会员新鲜度' ) COMMENT '会员主题信息表' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_topic';
2)导入数据
insert into table ads_user_topic select '2020-03-10', sum(if(login_date_last='2020-03-10',1,0)), sum(if(login_date_first='2020-03-10',1,0)), sum(if(payment_date_first='2020-03-10',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last='2020-03-10',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first='2020-03-10',1,0))/sum(if(login_date_last='2020-03-10',1,0)) from dwt_user_topic
3)查询数据
hive (gmall)> select * from ads_user_topic;
4)vim ads_user_topic.sh
添加如下内容:
#!/bin/bash APP=gmall hive=/opt/modules/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql=" with tmp_day_users as ( select '$do_date' dt, count(*) day_users from ${APP}.dwt_user_topic where login_date_last='$do_date' ), tmp_day_new_users as ( select '$do_date' dt, count(*) day_new_users from ${APP}.dwt_user_topic where login_date_last='$do_date' and login_date_first='$do_date' ), tmp_day_new_payment_users as ( select '$do_date' dt, count(*) day_new_payment_users from ${APP}.dwt_user_topic where payment_date_first='$do_date' ), tmp_payment_users as ( select '$do_date' dt, count(*) payment_users from ${APP}.dwt_user_topic where payment_date_first is not null ), tmp_users as ( select '$do_date' dt, count(*) users from ${APP}.dwt_user_topic tmp_users ) insert into table ${APP}.ads_user_topic select '$do_date' dt, day_users, day_new_users, day_new_payment_users, payment_users, users, day_users/users, payment_users/users, day_new_users/users from tmp_day_users join tmp_day_new_users on tmp_day_users.dt=tmp_day_new_users.dt join tmp_day_new_payment_users on tmp_day_users.dt=tmp_day_new_payment_users.dt join tmp_payment_users on tmp_day_users.dt=tmp_payment_users.dt join tmp_users on tmp_day_users.dt=tmp_users.dt; " $hive -e "$sql"
5)增加脚本执行权限
chmod 770 ads_user_topic.sh
6)执行脚本导入数据
ads_user_topic.sh 2020-03-11
7)查看导入数据
select * from ads_user_topic;
7.2.2 漏斗分析
统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值
1)建表语句
drop table if exists ads_user_action_convert_day; create external table ads_user_action_convert_day( `dt` string COMMENT '统计日期', `total_visitor_m_count` bigint COMMENT '总访问人数', `cart_u_count` bigint COMMENT '加入购物车的人数', `visitor2cart_convert_ratio` decimal(10,2) COMMENT '访问到加入购物车转化率', `order_u_count` bigint COMMENT '下单人数', `cart2order_convert_ratio` decimal(10,2) COMMENT '加入购物车到下单转化率', `payment_u_count` bigint COMMENT '支付人数', `order2payment_convert_ratio` decimal(10,2) COMMENT '下单到支付的转化率' ) COMMENT '用户行为漏斗分析' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_user_action_convert_day/';
2)数据装载
insert into table ads_user_action_convert_day select '2020-03-10', uv.day_count, ua.cart_count, cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio, ua.order_count, cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio, ua.payment_count, cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio from ( select dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from dws_user_action_daycount where dt='2020-03-10' group by dt )ua join ads_uv_count uv on uv.dt=ua.dt;
3)查询加载数据
select * from ads_user_action_convert_day;
7.3 商品主题
7.3.1 商品个数信息
1)建表语句
drop table if exists ads_product_info; create external table ads_product_info( `dt` string COMMENT '统计日期', `sku_num` string COMMENT 'sku 个数', `spu_num` string COMMENT 'spu 个数' ) COMMENT '商品个数信息' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_info';
2)导入数据
insert into table ads_product_info select '2020-03-10' dt, sku_num, spu_num from ( select '2020-03-10' dt, count(*) sku_num from dwt_sku_topic ) tmp_sku_num join ( select '2020-03-10' dt, count(*) spu_num from ( select spu_id from dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt;
3)查询结果数据
select * from ads_product_info;
7.3.2 商品销量排名
1)建表语句
drop table if exists ads_product_sale_topN; create external table ads_product_sale_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品 ID', `payment_amount` bigint COMMENT '销量' ) COMMENT '商品个数信息' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_sale_topN';
2)导入数据
insert into table ads_product_sale_topN select '2020-03-10' dt, sku_id, payment_amount from dws_sku_action_daycount where dt='2020-03-10' order by payment_amount desc limit 10;
3)查询结果数据
select * from ads_product_sale_topN;
7.3.3 商品收藏排名
1)建表语句
drop table if exists ads_product_favor_topN; create external table ads_product_favor_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品 ID', `favor_count` bigint COMMENT '收藏量' ) COMMENT '商品收藏 TopN' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_favor_topN';
2)导入数据
insert into table ads_product_favor_topN select '2020-03-10' dt, sku_id, favor_count from dws_sku_action_daycount where dt='2020-03-10' order by favor_count desc limit 10;
3)查询数据
select * from ads_product_favor_topN;
7.3.4 商品加入购物车排名
1)建表语句
drop table if exists ads_product_cart_topN; create external table ads_product_cart_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品 ID', `cart_num` bigint COMMENT '加入购物车数量' ) COMMENT '商品加入购物车 TopN' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_cart_topN';
2)导入数据
insert into table ads_product_cart_topN select '2020-03-10' dt, sku_id, cart_num from dws_sku_action_daycount where dt='2020-03-10' order by cart_num desc limit 10;
3)查询数据
select * from ads_product_cart_topN;
7.3.5 商品退款率排名(最近 30 天)
1)建表语句
drop table if exists ads_product_refund_topN; create external table ads_product_refund_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品 ID', `refund_ratio` decimal(10,2) COMMENT '退款率' ) COMMENT '商品退款率 TopN' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_product_refund_topN';
2)导入数据
insert into table ads_product_refund_topN select '2020-03-10', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from dwt_sku_topic order by refund_ratio desc limit 10;
3)查询数据
select * from ads_product_refund_topN;
7.3.6 商品差评率
1)建表语句
drop table if exists ads_appraise_bad_topN; create external table ads_appraise_bad_topN( `dt` string COMMENT '统计日期', `sku_id` string COMMENT '商品 ID', `appraise_bad_ratio` decimal(10,2) COMMENT '差评率' ) COMMENT '商品差评率 TopN' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_appraise_bad_topN';
2)导入数据
insert into table ads_appraise_bad_topN select '2020-03-10' dt, sku_id, appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_coun t+appraise_default_count) appraise_bad_ratio from dws_sku_action_daycount where dt='2020-03-10' order by appraise_bad_ratio desc limit 10;
3)查询数据
select * from ads_appraise_bad_topN;
7.4 营销主题(用户+商品+购买行为)
7.4.1 下单数目统计
需求分析:统计每日下单数,下单金额及下单用户数
1)建表语句
drop table if exists ads_order_daycount; create external table ads_order_daycount( dt string comment '统计日期', order_count bigint comment '单日下单笔数', order_amount decimal(10,2) comment '单日下单金额', order_users bigint comment '单日下单用户数' ) comment '每日订单总计表' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_order_daycount';
2)导入数据
insert into table ads_order_daycount select '2020-03-10', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from dws_user_action_daycount where dt='2020-03-10';
3)查询数据
select * from ads_order_daycount;
7.4.2 支付信息统计
每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自 DWD)
1)建表
drop table if exists ads_payment_daycount; create external table ads_payment_daycount( dt string comment '统计日期', payment_count bigint comment '单日支付笔数', payment_amount decimal(10,2) comment '单日支付金额', payment_user_count bigint comment '单日支付人数', payment_sku_count bigint comment '单日支付商品数', payment_avg_time double comment '下单到支付的平均时长,取分钟数' ) comment '每日订单总计表' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_payment_daycount';
2)导入数据
insert into table ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select '2020-03-15' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from dws_user_action_daycount where dt='2020-03-15' )tmp_payment join ( select '2020-03-15' dt, sum(if(payment_count>0,1,0)) payment_sku_count from dws_sku_action_daycount where dt='2020-03-15' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select '2020-03-15' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from dwd_fact_order_info where dt='2020-03-15' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt
3)查询数据
select * from ads_payment_daycount;
7.4.3 复购率
1)建表语句
drop table ads_sale_tm_category1_stat_mn; create external table ads_sale_tm_category1_stat_mn ( tm_id string comment '品牌 id', category1_id string comment '1 级品类 id ', category1_name string comment '1 级品类名称 ', buycount bigint comment '购买人数', buy_twice_last bigint comment '两次以上购买人数', buy_twice_last_ratio decimal(10,2) comment '单次复购率', buy_3times_last bigint comment '三次以上购买人数', buy_3times_last_ratio decimal(10,2) comment '多次复购率', stat_mn string comment '统计月份', stat_date string comment '统计日期' ) COMMENT '复购率统计' row format delimited fields terminated by '\t' location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';
2)数据导入
insert into table ads_sale_tm_category1_stat_mn select mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name, sum(if(mn.order_count>=1,1,0)) buycount, sum(if(mn.order_count>=2,1,0)) buyTwiceLast, sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0)) buyTwiceLastRatio, sum(if(mn.order_count>=3,1,0)) buy3timeLast , sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0)) buy3timeLastRatio , date_format('2020-03-10' ,'yyyy-MM') stat_mn, '2020-03-10' stat_date from ( select user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name, sum(order_count) order_count from dws_sale_detail_daycount sd where date_format(dt,'yyyy-MM')=date_format('2020-03-10' ,'yyyy-MM') group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name ) mn group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
7.5 ADS 层导入脚本
1)vim dwt_to_ads.sh
在脚本中填写如下内容
#!/bin/bash hive=/opt/modules/hive/bin/hive # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天 if [ -n "$1" ] ;then do_date=$1 else do_date=`date -d "-1 day" +%F` fi sql="use gmall; insert into table ads_uv_count select '$do_date', sum(if(login_date_last='$do_date',1,0)), sum(if(login_date_last>=date_add(next_day('$do_date','monday'),-7) and login_date_last<=date_add(next_day('$do_date','monday'),-1) ,1,0)), sum(if(date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-M M'),1,0)), if('$do_date'=date_add(next_day('$do_date','monday'),-1),'Y','N'), if('$do_date'=last_day('$do_date'),'Y','N') from dwt_uv_topic; insert into table ads_new_mid_count select '$do_date', count(*) from dwt_uv_topic where login_date_first='$do_date'; insert into table ads_silent_count select '$do_date', count(*) from dwt_uv_topic where login_date_first=login_date_last and login_date_last<=date_add('$do_date',-7); insert into table ads_back_count select '$do_date', concat(date_add(next_day('2020-03-10','MO'),-7),'_',date_add(next_day('2020- 03-10','MO'),-1)), count(*) from ( select mid_id from dwt_uv_topic where login_date_last>=date_add(next_day('$do_date','MO'),-7) and login_date_last<= date_add(next_day('$do_date','MO'),-1) and login_date_first<date_add(next_day('$do_date','MO'),-7) )current_wk left join ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','MO'),-7*2) and dt<= date_add(next_day('$do_date','MO'),-7-1) group by mid_id )last_wk on current_wk.mid_id=last_wk.mid_id where last_wk.mid_id is null; insert into table ads_wastage_count select '$do_date', count(*) from dwt_uv_topic where login_date_last<=date_add('$do_date',-7); insert into table ads_user_retention_day_rate select '$do_date', date_add('$do_date',-3), 3, sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0)), sum(if(login_date_first=date_add('$do_date',-3),1,0)), sum(if(login_date_first=date_add('$do_date',-3) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date', -3),1,0))*100 from dwt_uv_topic union all select '$do_date', date_add('$do_date',-2), 2, sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0)), sum(if(login_date_first=date_add('$do_date',-2),1,0)), sum(if(login_date_first=date_add('$do_date',-2) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date', -2),1,0))*100 from dwt_uv_topic union all select '$do_date', date_add('$do_date',-1), 1, sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0)), sum(if(login_date_first=date_add('$do_date',-1),1,0)), sum(if(login_date_first=date_add('$do_date',-1) and login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date', -1),1,0))*100 from dwt_uv_topic; insert into table ads_continuity_wk_count select '$do_date', concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_d ate','MO'),-1)), count(*) from ( select mid_id from ( select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7) and dt<=date_add(next_day('$do_date','monday'),-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7*2) and dt<=date_add(next_day('$do_date','monday'),-7-1) group by mid_id union all select mid_id from dws_uv_detail_daycount where dt>=date_add(next_day('$do_date','monday'),-7*3) and dt<=date_add(next_day('$do_date','monday'),-7*2-1) group by mid_id )t1 group by mid_id having count(*)=3 )t2; insert into table ads_continuity_uv_count select '$do_date', concat(date_add('$do_date',-6),'_','$do_date'), count(*) from ( select mid_id from ( select mid_id from ( select mid_id, date_sub(dt,rank) date_dif from ( select mid_id, dt, rank() over(partition by mid_id order by dt) rank from dws_uv_detail_daycount where dt>=date_add('$do_date',-6) and dt<='$do_date' )t1 )t2 group by mid_id,date_dif having count(*)>=3 )t3 group by mid_id )t4; insert into table ads_user_topic select '$do_date', sum(if(login_date_last='$do_date',1,0)), sum(if(login_date_first='$do_date',1,0)), sum(if(payment_date_first='$do_date',1,0)), sum(if(payment_count>0,1,0)), count(*), sum(if(login_date_last='$do_date',1,0))/count(*), sum(if(payment_count>0,1,0))/count(*), sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1, 0)) from dwt_user_topic; insert into table ads_user_action_convert_day select '$do_date', uv.day_count, ua.cart_count, ua.cart_count/uv.day_count*100 visitor2cart_convert_ratio, ua.order_count, ua.order_count/ua.cart_count*100 visitor2order_convert_ratio, ua.payment_count, ua.payment_count/ua.order_count*100 order2payment_convert_ratio from ( select '$do_date' dt, sum(if(cart_count>0,1,0)) cart_count, sum(if(order_count>0,1,0)) order_count, sum(if(payment_count>0,1,0)) payment_count from dws_user_action_daycount where dt='$do_date' )ua join ads_uv_count uv on uv.dt=ua.dt; insert into table ads_product_info select '$do_date' dt, sku_num, spu_num from ( select '$do_date' dt, count(*) sku_num from dwt_sku_topic ) tmp_sku_num join ( select '$do_date' dt, count(*) spu_num from ( select spu_id from dwt_sku_topic group by spu_id ) tmp_spu_id ) tmp_spu_num on tmp_sku_num.dt=tmp_spu_num.dt; insert into table ads_product_sale_topN select '$do_date', sku_id, payment_amount from dws_sku_action_daycount where dt='$do_date' order by payment_amount desc limit 10; insert into table ads_product_favor_topN select '$do_date', sku_id, favor_count from dws_sku_action_daycount where dt='$do_date' order by favor_count limit 10; insert into table ads_product_cart_topN select '$do_date' dt, sku_id, cart_num from dws_sku_action_daycount where dt='$do_date' order by cart_num limit 10; insert into table ads_product_refund_topN select '$do_date', sku_id, refund_last_30d_count/payment_last_30d_count*100 refund_ratio from dwt_sku_topic order by refund_ratio desc limit 10; insert into table ads_appraise_bad_topN select '$do_date' dt, sku_id, appraise_bad_count/(appraise_bad_count+appraise_good_count+appraise_mid_coun t+appraise_default_count)*100 appraise_bad_ratio from dws_sku_action_daycount where dt='$do_date' order by appraise_bad_ratio desc limit 10; insert into table ads_order_daycount select '$do_date', sum(order_count), sum(order_amount), sum(if(order_count>0,1,0)) from dws_user_action_daycount where dt='$do_date'; insert into table ads_payment_daycount select tmp_payment.dt, tmp_payment.payment_count, tmp_payment.payment_amount, tmp_payment.payment_user_count, tmp_skucount.payment_sku_count, tmp_time.payment_avg_time from ( select '$do_date' dt, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(payment_count>0,1,0)) payment_user_count from dws_user_action_daycount where dt='$do_date' )tmp_payment join ( select '$do_date' dt, sum(if(payment_count>0,1,0)) payment_sku_count from dws_sku_action_daycount where dt='$do_date' )tmp_skucount on tmp_payment.dt=tmp_skucount.dt join ( select '$do_date' dt, sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60 payment_avg_time from dwd_fact_order_info where dt='$do_date' and payment_time is not null )tmp_time on tmp_payment.dt=tmp_time.dt; insert into table ads_sale_tm_category1_stat_mn select mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name, sum(if(mn.order_count>=1,1,0)) buycount, sum(if(mn.order_count>=2,1,0)) buyTwiceLast, sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0)) buyTwiceLastRatio, sum(if(mn.order_count>=3,1,0)) buy3timeLast , sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0)) buy3timeLastRatio , date_format('$do_date' ,'yyyy-MM') stat_mn, '$do_date' stat_date from ( select user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name, sum(order_count) order_count from dws_sale_detail_daycount sd where date_format(dt,'yyyy-MM')=date_format('$do_date' ,'yyyy-MM') group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name ) mn group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name; " $hive -e "$sql"
2)增加脚本执行权限
chmod 770 dwt_to_ads.sh
3)执行脚本导入数据
dwt_to_ads.sh 2020-03-10
本文来自博客园,作者:大码王,转载请注明原文链接:https://www.cnblogs.com/huanghanyu/
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 阿里最新开源QwQ-32B,效果媲美deepseek-r1满血版,部署成本又又又降低了!
· 开源Multi-agent AI智能体框架aevatar.ai,欢迎大家贡献代码
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· AI技术革命,工作效率10个最佳AI工具