华为云如何建表并创建作业定时调度抽取数据
首先新建DLI SQL脚本
DLI SQL是在hive、spark基础上封装的脚本工具
创建库表并导入数据
create table if not exists akc_dw.rpt_mkt_xf_promo_merchant_data_1h_share_test_case2
(
data_time_key string comment '调度执行计划的时间 yyyy-mm-dd hh:mi:ss',
promo_activity_no string comment '营销活动编号',
team_id string comment '分校校长id',
merchant_id string comment '商家id',
share_cnt bigint comment '转发次数'
) comment '销返看板数据之每个社群群主转发次数-指定商品维度'
partitioned by (mt string)--定义分区类型
stored as orc
;
insert overwrite table akc_dw.rpt_mkt_xf_promo_merchant_data_1h_share_test_case2 partition (mt='${cyctime}')
--计算已报名的分校校长人数,和分校校长转发次数
SELECT
to_date1('${cyctime}','yyyymmddhhmiss') as data_time_key--调度执行计划的时间,这个参数是提前在作业管理页面的配置管理中定义好的,当前时间就是计划调度时间
,sum(case when t4.team_id is not null then t1.share_cnt end )as share_cnt
, t1.promo_activity_no
,t4.team_id--需要获取流量转发人的校长id
,t1.merchant_id
FROM --获取流量关联的营销活动
(select
t1.union_id
,t1.buyer_id
,t1.distributor_id
,coalesce(t1.user_id,t3.user_id) as user_id--每一条转发记录对应的小bid,直接从t3店铺信息表取
,t1.user_code
,t1.activity_id
,t1.activity_name
,t1.product_id
,t1.product_name
,t1.brand_id
,t1.brand_name
,t1.share_cnt
,t2.promo_activity_no
,t2.promo_activity_type
,t2.promo_rank_type--1是销返2是满返
,t2.target_group
,t2.start_time
,t2.end_time
,t2.merchant_id
,t1.dt
,t1.shop_id
from ( --获取流量数据
select
t1.union_id
,t1.buyer_id
,t1.distributor_id
,t1.user_id
,t1.user_code
,t1.activity_id
,t2.live_name as activity_name
,t1.product_id
,t3.product_name
,t3.brand_id
,t3.brand_name
,t1.share_cnt
,t1.dt
,t1.shop_id
from
(--获取某idol近30天的流量数据
select
t1.union_id
,t1.buyer_id
,t1.distributor_id
,t1.user_id
,t1.user_code
,if(event_type rlike 'click|share|add_to_cart' , click_activity_id,activity_id ) as activity_id
,if(event_type rlike 'click|share|add_to_cart' , click_product_id,product_id ) as product_id
,t1.dt
,t1.shop_id
,count(1) share_cnt--每一天每一个商品/活动用户的转发次数
from
akc_dw.edw_trfc_traffic_detail_all_plt_di_15m t1--流量表
where
t1.dt between to_char(dateadd(to_date1('${cyctime}','yyyymmddhhmiss'),-20,'dd'),'yyyymmdd') and '${gmtdate}'
and t1.share_type = '商品'
and t1.event_type = 'share'
AND t1.etl_time<=to_date1('${cyctime}','yyyymmddhhmiss')--小于等于调度时间
group by
t1.union_id
,t1.buyer_id
,t1.distributor_id
,t1.user_id
,t1.user_code
,if( event_type rlike 'click|share|add_to_cart' , click_activity_id,activity_id )
,if( event_type rlike 'click|share|add_to_cart' , click_product_id,product_id )
,t1.dt
,t1.shop_id
) t1
join
(
select live_id,live_name
from akc_dw.frt_dim_live_info --活动表获得流量表活动名称
) t2
on t1.activity_id = t2.live_id
left join
akdc.dim_product_info t3 --商品信息表获取流量表商品品牌信息
on t1.product_id = t3.product_id
)t1
inner join
(
SELECT
t1.product_skc_id as product_id
,t1.activity_id as activity_id
,t2.promo_activity_config_id as promo_activity_no --营销活动no
,t2.name as promo_activity_name --营销活动名称
,t2.type as promo_activity_type --营销活动类型 1排名 2满返
,t2.rank_type as promo_rank_type
,t2.start_time --营销活动开始时间
,t2.end_time --营销活动结束时间
,t2.after_sale_deadline --营销活动售后截止时间
,t2.target_group --营销活动参与的主体 1社群 2单店店主 3分校
,t2.target_gmv --营销活动的目标值
,t1.merchant_id
,t1.merchant_rate
,t1.platform_rate
FROM
akc_dw.frt_ods_treasurebox_akc_treasurebox_rule_activity_product_df t1--参与销返的活动的指定商品
inner join
akc_dw.frt_ods_treasurebox_akc_treasurebox_config_rule_df t2
on t1.promo_activity_config_id = t2.promo_activity_config_id
and t2.status = 1
and t2.is_deleted = 0
and t2.dt = max_pt('akc_dw.frt_ods_treasurebox_akc_treasurebox_config_rule_df')
where
t1.dt = max_pt('akc_dw.frt_ods_treasurebox_akc_treasurebox_rule_activity_product_df')
and t1.status = 1--启用状态
and t1.is_deleted = 0 --未删除
)t2
on t1.product_id = t2.product_id
and t1.activity_id = t2.activity_id
left join
akc_dw.dim_shop_info t3--为了获取流量数据是店铺维度下所有人的转发
on t1.shop_id = t3.shop_id
where
coalesce(t1.user_id,t3.user_id) is not null--如果是爱豆自己转发则取爱豆自己的user_id,否则取爱豆店主下的其他人转发
and t1.dt between to_char(t2.start_time,'yyyymmdd') and to_char(t2.end_time,'yyyymmdd')
and t2.target_group = 1--营销主体为分校
and t2.promo_activity_type = 1 --限定是排名的营销活动
)t1
left join
(--获得社群战队的数据
select
user_id
,depart_id as team_id
,depart_name as team_name
from
akcdw.user_code_fish_group_channel
where dt = max_pt('akcdw.user_code_fish_group_channel')
and channel = '战队'
) t4
on t1.user_id = t4.user_id
group by
t1.promo_activity_no
,t4.team_id
,t1.merchant_id
提交保存脚本,脚本名字和表名要一致
作业管理创建作业目录
新建作业
然后进入新建作业界面
点击添加的dli sql编辑作业信息
...中搜索脚本名称即表名即可
配置自动调度
并发数选100,意思是截止上次失败后多少次内还会继续执行