描述的是订单各个阶段的状态

用户 地区 时间 商品 优惠券 活动 度量值
订单 一次

订单的生命周期

下单时间=》支付时间=》取消时间=》完成时间=》退款时间=》退款完成时间

订单事实表的创建

--订单事实表  增量表, 当日只存储create_time为当日的数据, 所以create_time和分区字段dt是一致的。累积型快照事实表都有一个create_time作为分区字段。

drop table if exists dwd_fact_order_info;
create external table dwd_fact_order_info (
`id` string COMMENT '订单编号',            --order_info
`order_status` string COMMENT '订单状态',  --order_info 当前订单状态
`user_id` string COMMENT '用户 id',        --order_info
`out_trade_no` string COMMENT '支付流水号', --order_info
`create_time` string COMMENT '创建时间(未支付状态)',  --order_status
`payment_time` string COMMENT '支付时间(已支付状态)', --order_status
`cancel_time` string COMMENT '取消时间(已取消状态)',  --order_status
`finish_time` string COMMENT '完成时间(已完成状态)',  --order_status
`refund_time` string COMMENT '退款时间(退款中状态)',  --order_status
`refund_finish_time` string COMMENT '退款完成时间(退款完成状态)', --order_status
`province_id` string COMMENT '省份 ID',   --order_info
`activity_id` string COMMENT '活动 ID',   --activity 关联活动表,ods层的订单活动关联表,关键键是order_info中的create_time
`original_total_amount` decimal(16,2) COMMENT '原价金额',   --order_info
`benefit_reduce_amount` decimal(16,2) COMMENT '优惠金额',   --order_info
`feight_fee` decimal(16,2) COMMENT '运费',                  --order_info
`final_total_amount` decimal(16,2) COMMENT '订单金额'       --order_info
) COMMENT '订单事实表'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_order_info/'
tblproperties ("parquet.compression"="lzo");
--订单表  只有创建时间、订单状态、操作时间。相当于订单的最新状态,没有订单过去的状态。增量表(将创建时间或者操作时间为当日的订单信息导入)
drop table if exists ods_order_info;
create external table ods_order_info (
`id` string COMMENT '订单号',
`final_total_amount` decimal(16,2) COMMENT '订单金额',
`order_status` string COMMENT '订单状态',
`user_id` string COMMENT '用户 id',
`out_trade_no` string COMMENT '支付流水号',
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '操作时间',
`province_id` string COMMENT '省份 ID',
`benefit_reduce_amount` decimal(16,2) COMMENT '优惠金额',
`original_total_amount` decimal(16,2) COMMENT '原价金额',
`feight_fee` decimal(16,2) COMMENT '运费'
) COMMENT '订单表'
PARTITIONED BY (`dt` string) -- 按照时间创建分区

--订单状态表(增量表,将operate_time为当日的数据导入,operate_time可能是订单生命周期中的任何操作)  订单状态表分区表, 包含了订单的各个状态和各个状态的操作时间
drop table if exists ods_order_status_log;
create external table ods_order_status_log (
`id` string COMMENT '编号',
`order_id` string COMMENT '订单 ID',
`order_status` string COMMENT '订单状态',
`operate_time` string COMMENT '修改时间'
) COMMENT '订单状态表'
PARTITIONED BY (`dt` string)


--活动订单关联表,增量表(将创建时间为当前日期的数据导入到ods层)。
drop table if exists ods_activity_order;
create external table ods_activity_order(
`id` string COMMENT '编号',
`activity_id` string COMMENT '活动ID',
`order_id` string COMMENT '订单id',
`create_time` string COMMENT '领取时间'
) COMMENT '活动订单关联表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_activity_order/';


数据示例:


最终的sql
insert overwrite table dwd_fact_order_info
partition(dt)
select 
nvl(new.order_id, old.order_id) as order_id,
nvl(new.order_status, old.order_status) as order_status,
nvl(new.user_id, old.user_id) as user_id,
nvl(new.out_trade_no, old.out_trade_no) as out_trade_no,
nvl(new.out_trade_no, old.out_trade_no) as out_trade_no,
nvl(new.create_time,old.create_time) as create_time,
nvl(new.payment_time,old.payment_time) as payment_time,
nvl(new.cancel_time, old.cancel_time) as cancel_time,
nvl(new.finish_time, old.finish_time) as finish_time,
nvl(new.refund_time, old.refund_time) as refund_time,
nvl(new.refund_finish_time, old.refund_finish_time) as refund_finish_time,
nvl(new.province_id, old.province_id) as province_id,
nvl(new.activity_id, old.activity_id) as activity_id,
nvl(new.original_total_amount,old.original_total_amount) as original_total_amount,
nvl(new.benefit_reduce_amount, old.benefit_reduce_amount) as benefit_reduce_amount,
nvl(new.feight_fee, old.feight_fee) as feight_fee,
nvl(new.final_total_amount, old.final_total_amount) as final_total_amount,
nvl(new.create_time, old.create_time) as dt
from
(select
  * 
from dwd_fact_order_info where dt in (
select create_time from ods_order_info where dt='20201206' 
)) old 

full outer join
( 
select 
order_info.id,
order_info.order_status,
order_info.user_id,
order_info.out_trade_no,
order_info.province_id,
order_info.original_total_amount,
order_info.benefit_reduce_amount,
order_info.feight_fee,
order_info.final_total_amount
order_status.tmp_map['1001'] as create_time,
order_status.tmp_map['1002'] as payment_time,
order_status.tmp_map['1003'] as cancel_time,
order_status.tmp_map['1004'] as finish_time,
order_status.tmp_map['1005'] as refund_time,
order_status.tmp_map['1006'] as refund_finish_time,
order_activity.activity_id as activity_id
from 
(
select * from 
ods_order_info where dt='20201206'
) order_info
join 
(
select order_id, 
str_to_map(concat_ws(',', collect_set(concat(order_status, '=', operate_time))),',','=') as tmp_map, 
from ods_order_status_log  where dt='20201206'
group by order_id) order_status
on order_info.id = order_status.order_id
left join
(select * from ods_activity_order where dt='20201006') order_activity  --只需要取当天创建的订单的活动id,因为之前创建的活动id已经处理好了
on order_info.id = order_activity.order_id  
) new
posted on 2020-12-06 13:20  jeasonchen001  阅读(946)  评论(0编辑  收藏  举报