经典场景: 订单明细表中分摊金额的问题
ods层数据:
--ods层的订单详情表 增量表
drop table if exists ods_order_detail;
create external table ods_order_detail(
`id` string COMMENT '编号',
`order_id` string COMMENT '订单号',
`user_id` string COMMENT '用户 id',
`sku_id` string COMMENT '商品 id',
`sku_name` string COMMENT '商品名称',
`order_price` decimal(16,2) COMMENT '商品价格',
`sku_num` bigint COMMENT '商品数量',
`create_time` string COMMENT '创建时间',
`source_type` string COMMENT '来源类型',
`source_id` string COMMENT '来源编号'
) COMMENT '订单详情表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_detail/';
--ods层的订单表 增量表
drop table if exists ods_order_info;
create external table ods_order_info (
`id` string COMMENT '订单号',
`final_total_amount` decimal(16,2) COMMENT '订单金额',
`order_status` string COMMENT '订单状态',
`user_id` string COMMENT '用户 id',
`out_trade_no` string COMMENT '支付流水号',
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '操作时间',
`province_id` string COMMENT '省份 ID',
`benefit_reduce_amount` decimal(16,2) COMMENT '优惠金额',
`original_total_amount` decimal(16,2) COMMENT '原价金额',
`feight_fee` decimal(16,2) COMMENT '运费'
) COMMENT '订单表'
PARTITIONED BY (`dt` string) -- 按照时间创建分区
row format delimited fields terminated by '\t' -- 指定分割符为\t
STORED AS -- 指定存储方式, 读数据采用 LzoTextInputFormat; 输出数据采用 TextOutputFormat
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_info/' -- 指定数据在hdfs上的存储位置
;
场景: 有一笔订单包含多个商品
洗面奶 *2 =50 、洗衣液 *3=30 、酸奶 * 1=20,订单详情中有商品的原始单价
运费:10 原始总金额:100 优惠金额:20 最终支付金额90, 求这笔订单中每种商品分摊运费多少、分摊支付金额多少、分摊优惠金额多少。
按照一般的逻辑:每种商品分摊的运费是3.33, dwd层的数据汇总得到的运费是9.99,会产生误差。为了消除这种误差,将误差金额加到最贵的商品上。
--dwd层建表语句
create external table dwd_fact_order_detail(
`id` string COMMENT '订单编号',
`order_id` string COMMENT '订单号',
`user_id` string COMMENT '用户id',
`sku_id` string COMMENT '商品id',
`sku_name` string COMMENT '商品名称',
`order_price` decimal(16,2) COMMENT '商品价格',
`sku_num` bigint COMMENT '商品数量',
`create_time` string COMMENT '创建时间',
`province_id` string COMMENT '省份id',
`source_type` string COMMENT '来源类型',
`source_id` string COMMENT '来源编号',
`original_amount_d` decimal(20,2) COMMENT '原始价格分摊',
`final_amount_d` decimal(20,2) COMMENT '购买价格分摊',
`feight_fee_d` decimal(20,2) COMMENT '运费分摊',
`benefit_reduce_amount_d` decimal(20,2) COMMENT '优惠分摊'
)COMMENT '订单明细事实表'
partitioned by (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_order_detail'
tblproperties('parquet.compression'='lzo')
不考虑误差的分摊金额
with order_detail_tmp as(
select
id, --编号
order_id, --订单id
user_id,
sku_id,
sku_name,
order_price,
sku_num,
create_time,
source_type,
source_id
from ods_order_detail where dt='20201031'),
order_info_tmp as (
select
id, --订单id
final_total_amount,
province_id,
benefit_reduce_amount,
original_total_amount,
feight_fee
from ods_order_info where dt='20201031')
insert overwrite table dwd_fact_order_detail partition(dt='20201031')
select
order_detail.id as id,
order_detail.order_id as order_id,
order_detail.user_id as user_id,
order_detail.sku_id as sku_id,
order_detail.sku_name as sku_name,
order_detail.order_price as order_price,
order_detail.sku_num as sku_num,
order_detail.create_time as create_time,
order_info.province_id as province_id,
order_detail.source_type as source_type,
order_detail.source_id as source_id,
--order_info.final_total_amount,
--order_info.benefit_reduce_amount,
--order_info.original_total_amount,
--order_info.feight_fee,
round(order_detail.order_price*order_detail.sku_num,2) as original_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2) as final_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2) as feight_fee_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2) as benefit_reduce_amount_d
from order_detail_tmp order_detail left join order_info_tmp order_info on order_detail.order_id = order_info.id
考虑误差,将误差加到原始价格最高的商品上
with order_detail_tmp as(
select
id, --编号
order_id, --订单id
user_id,
sku_id,
sku_name,
order_price,
sku_num,
create_time,
source_type,
source_id
from ods_order_detail where dt='20201031'),
order_info_tmp as (
select
id, --订单id
final_total_amount,
province_id,
benefit_reduce_amount,
original_total_amount,
feight_fee
from ods_order_info where dt='20201031')
insert overwrite table dwd_fact_order_detail partition(dt='20201031')
select
id,
order_id,
sku_id,
sku_name,
order_price,
sku_num,
create_time,
province_id,
source_type,
source_id,
if(rank=1, original_amount_d+original_total_amount-sum_original_amount_d, original_amount_d) as original_amount_d,
if(rank=1, final_amount_d+final_total_amount-sum_final_amount_d, final_amount_d) as final_amount_d,
if(rank=1, feight_fee_d+feight_fee-sum_feight_fee_d, feight_fee_d) as feight_fee_d,
if(rank=1, benefit_reduce_amount_d+benefit_reduce_amount-sum_benefit_reduce_amount_d, benefit_reduce_amount_d) as benefit_reduce_amount_d,
from (
select
order_detail.id as id,
order_detail.order_id as order_id,
order_detail.user_id as user_id,
order_detail.sku_id as sku_id,
order_detail.sku_name as sku_name,
order_detail.order_price as order_price,
order_detail.sku_num as sku_num,
order_detail.create_time as create_time,
order_info.province_id as province_id,
order_detail.source_type as source_type,
order_detail.source_id as source_id,
order_info.final_total_amount,
order_info.benefit_reduce_amount,
order_info.original_total_amount,
order_info.feight_fee,
round(order_detail.order_price*order_detail.sku_num,2) as original_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2) as final_amount_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2) as feight_fee_d,
round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2) as benefit_reduce_amount_d,
sum(round(order_detail.order_price*order_detail.sku_num,2)) over(partition by order_id) as sum_original_amount_d,
sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2)) over(partition by order_id) as sum_final_amount_d,
sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2)) over(partition by order_id) as sum_feight_fee_d,
sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2)) over(partition by order_id) as sum_benefit_reduce_amount_d,
row_number() over(partition by order_id order by round(order_detail.order_price*order_detail.sku_num,2)) as rank
from order_detail_tmp order_detail left join order_info_tmp order_info on order_detail.order_id = order_info.id) tmp