描述的是订单各个阶段的状态
用户 | 地区 | 时间 | 商品 | 优惠券 | 活动 | 度量值 | ||
---|---|---|---|---|---|---|---|---|
订单 | √ | √ | √ | √ | 一次 |
订单的生命周期
下单时间=》支付时间=》取消时间=》完成时间=》退款时间=》退款完成时间
订单事实表的创建
--订单事实表 增量表, 当日只存储create_time为当日的数据, 所以create_time和分区字段dt是一致的。累积型快照事实表都有一个create_time作为分区字段。
drop table if exists dwd_fact_order_info;
create external table dwd_fact_order_info (
`id` string COMMENT '订单编号', --order_info
`order_status` string COMMENT '订单状态', --order_info 当前订单状态
`user_id` string COMMENT '用户 id', --order_info
`out_trade_no` string COMMENT '支付流水号', --order_info
`create_time` string COMMENT '创建时间(未支付状态)', --order_status
`payment_time` string COMMENT '支付时间(已支付状态)', --order_status
`cancel_time` string COMMENT '取消时间(已取消状态)', --order_status
`finish_time` string COMMENT '完成时间(已完成状态)', --order_status
`refund_time` string COMMENT '退款时间(退款中状态)', --order_status
`refund_finish_time` string COMMENT '退款完成时间(退款完成状态)', --order_status
`province_id` string COMMENT '省份 ID', --order_info
`activity_id` string COMMENT '活动 ID', --activity 关联活动表,ods层的订单活动关联表,关键键是order_info中的create_time
`original_total_amount` decimal(16,2) COMMENT '原价金额', --order_info
`benefit_reduce_amount` decimal(16,2) COMMENT '优惠金额', --order_info
`feight_fee` decimal(16,2) COMMENT '运费', --order_info
`final_total_amount` decimal(16,2) COMMENT '订单金额' --order_info
) COMMENT '订单事实表'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_order_info/'
tblproperties ("parquet.compression"="lzo");
--订单表 只有创建时间、订单状态、操作时间。相当于订单的最新状态,没有订单过去的状态。增量表(将创建时间或者操作时间为当日的订单信息导入)
drop table if exists ods_order_info;
create external table ods_order_info (
`id` string COMMENT '订单号',
`final_total_amount` decimal(16,2) COMMENT '订单金额',
`order_status` string COMMENT '订单状态',
`user_id` string COMMENT '用户 id',
`out_trade_no` string COMMENT '支付流水号',
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '操作时间',
`province_id` string COMMENT '省份 ID',
`benefit_reduce_amount` decimal(16,2) COMMENT '优惠金额',
`original_total_amount` decimal(16,2) COMMENT '原价金额',
`feight_fee` decimal(16,2) COMMENT '运费'
) COMMENT '订单表'
PARTITIONED BY (`dt` string) -- 按照时间创建分区
--订单状态表(增量表,将operate_time为当日的数据导入,operate_time可能是订单生命周期中的任何操作) 订单状态表分区表, 包含了订单的各个状态和各个状态的操作时间
drop table if exists ods_order_status_log;
create external table ods_order_status_log (
`id` string COMMENT '编号',
`order_id` string COMMENT '订单 ID',
`order_status` string COMMENT '订单状态',
`operate_time` string COMMENT '修改时间'
) COMMENT '订单状态表'
PARTITIONED BY (`dt` string)
--活动订单关联表,增量表(将创建时间为当前日期的数据导入到ods层)。
drop table if exists ods_activity_order;
create external table ods_activity_order(
`id` string COMMENT '编号',
`activity_id` string COMMENT '活动ID',
`order_id` string COMMENT '订单id',
`create_time` string COMMENT '领取时间'
) COMMENT '活动订单关联表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_activity_order/';
数据示例:
最终的sql
insert overwrite table dwd_fact_order_info
partition(dt)
select
nvl(new.order_id, old.order_id) as order_id,
nvl(new.order_status, old.order_status) as order_status,
nvl(new.user_id, old.user_id) as user_id,
nvl(new.out_trade_no, old.out_trade_no) as out_trade_no,
nvl(new.out_trade_no, old.out_trade_no) as out_trade_no,
nvl(new.create_time,old.create_time) as create_time,
nvl(new.payment_time,old.payment_time) as payment_time,
nvl(new.cancel_time, old.cancel_time) as cancel_time,
nvl(new.finish_time, old.finish_time) as finish_time,
nvl(new.refund_time, old.refund_time) as refund_time,
nvl(new.refund_finish_time, old.refund_finish_time) as refund_finish_time,
nvl(new.province_id, old.province_id) as province_id,
nvl(new.activity_id, old.activity_id) as activity_id,
nvl(new.original_total_amount,old.original_total_amount) as original_total_amount,
nvl(new.benefit_reduce_amount, old.benefit_reduce_amount) as benefit_reduce_amount,
nvl(new.feight_fee, old.feight_fee) as feight_fee,
nvl(new.final_total_amount, old.final_total_amount) as final_total_amount,
nvl(new.create_time, old.create_time) as dt
from
(select
*
from dwd_fact_order_info where dt in (
select create_time from ods_order_info where dt='20201206'
)) old
full outer join
(
select
order_info.id,
order_info.order_status,
order_info.user_id,
order_info.out_trade_no,
order_info.province_id,
order_info.original_total_amount,
order_info.benefit_reduce_amount,
order_info.feight_fee,
order_info.final_total_amount
order_status.tmp_map['1001'] as create_time,
order_status.tmp_map['1002'] as payment_time,
order_status.tmp_map['1003'] as cancel_time,
order_status.tmp_map['1004'] as finish_time,
order_status.tmp_map['1005'] as refund_time,
order_status.tmp_map['1006'] as refund_finish_time,
order_activity.activity_id as activity_id
from
(
select * from
ods_order_info where dt='20201206'
) order_info
join
(
select order_id,
str_to_map(concat_ws(',', collect_set(concat(order_status, '=', operate_time))),',','=') as tmp_map,
from ods_order_status_log where dt='20201206'
group by order_id) order_status
on order_info.id = order_status.order_id
left join
(select * from ods_activity_order where dt='20201006') order_activity --只需要取当天创建的订单的活动id,因为之前创建的活动id已经处理好了
on order_info.id = order_activity.order_id
) new