经典场景: 订单明细表中分摊金额的问题

ods层数据:

--ods层的订单详情表 增量表
drop table if exists ods_order_detail;
create external table ods_order_detail(
`id` string COMMENT '编号',
`order_id` string COMMENT '订单号',
`user_id` string COMMENT '用户 id',
`sku_id` string COMMENT '商品 id',
`sku_name` string COMMENT '商品名称',
`order_price` decimal(16,2) COMMENT '商品价格',
`sku_num` bigint COMMENT '商品数量',
`create_time` string COMMENT '创建时间',
`source_type` string COMMENT '来源类型',
`source_id` string COMMENT '来源编号'
) COMMENT '订单详情表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_detail/';

--ods层的订单表 增量表
drop table if exists ods_order_info;
create external table ods_order_info (
`id` string COMMENT '订单号',
`final_total_amount` decimal(16,2) COMMENT '订单金额',
`order_status` string COMMENT '订单状态',
`user_id` string COMMENT '用户 id',
`out_trade_no` string COMMENT '支付流水号',
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '操作时间',
`province_id` string COMMENT '省份 ID',
`benefit_reduce_amount` decimal(16,2) COMMENT '优惠金额',
`original_total_amount` decimal(16,2) COMMENT '原价金额',
`feight_fee` decimal(16,2) COMMENT '运费'
) COMMENT '订单表'
PARTITIONED BY (`dt` string) -- 按照时间创建分区
row format delimited fields terminated by '\t' -- 指定分割符为\t
STORED AS -- 指定存储方式, 读数据采用 LzoTextInputFormat; 输出数据采用 TextOutputFormat
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_order_info/' -- 指定数据在hdfs上的存储位置
;

场景: 有一笔订单包含多个商品

洗面奶 *2 =50 、洗衣液 *3=30 、酸奶 * 1=20,订单详情中有商品的原始单价

运费:10 原始总金额:100 优惠金额:20 最终支付金额90, 求这笔订单中每种商品分摊运费多少、分摊支付金额多少、分摊优惠金额多少。

按照一般的逻辑:每种商品分摊的运费是3.33, dwd层的数据汇总得到的运费是9.99,会产生误差。为了消除这种误差,将误差金额加到最贵的商品上。

--dwd层建表语句
create external table dwd_fact_order_detail(
    `id` string COMMENT '订单编号',
    `order_id` string COMMENT '订单号',
    `user_id` string COMMENT '用户id',
    `sku_id` string COMMENT '商品id',
    `sku_name` string COMMENT '商品名称',
    `order_price` decimal(16,2) COMMENT '商品价格',
    `sku_num` bigint COMMENT '商品数量',
    `create_time` string COMMENT '创建时间',
    `province_id` string COMMENT '省份id',
    `source_type` string COMMENT '来源类型',
    `source_id` string COMMENT '来源编号',
    `original_amount_d` decimal(20,2) COMMENT '原始价格分摊',
    `final_amount_d` decimal(20,2) COMMENT '购买价格分摊',
    `feight_fee_d` decimal(20,2) COMMENT '运费分摊',
    `benefit_reduce_amount_d` decimal(20,2) COMMENT '优惠分摊'
)COMMENT '订单明细事实表'
partitioned by (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_order_detail'
tblproperties('parquet.compression'='lzo')

不考虑误差的分摊金额

with order_detail_tmp as(
select 
      id, --编号
      order_id, --订单id
      user_id,
      sku_id,
      sku_name,
      order_price,
      sku_num,
      create_time,
      source_type,
      source_id
from ods_order_detail where dt='20201031'),
order_info_tmp as (
select 
      id, --订单id
      final_total_amount,
      province_id,
      benefit_reduce_amount,
      original_total_amount,
      feight_fee
from ods_order_info where dt='20201031')
insert overwrite table dwd_fact_order_detail partition(dt='20201031')
select 
      order_detail.id as id,
      order_detail.order_id as order_id,
      order_detail.user_id as user_id,
      order_detail.sku_id as sku_id,
      order_detail.sku_name as sku_name,
      order_detail.order_price as order_price,
      order_detail.sku_num as sku_num,
      order_detail.create_time as create_time,
      order_info.province_id as province_id,
      order_detail.source_type as source_type,
      order_detail.source_id as source_id,
      --order_info.final_total_amount,
      --order_info.benefit_reduce_amount,
      --order_info.original_total_amount,
      --order_info.feight_fee,
      round(order_detail.order_price*order_detail.sku_num,2) as original_amount_d,
      round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2) as final_amount_d,
      round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2) as feight_fee_d,
      round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2) as benefit_reduce_amount_d
from order_detail_tmp order_detail left join order_info_tmp order_info on order_detail.order_id = order_info.id

考虑误差,将误差加到原始价格最高的商品上

with order_detail_tmp as(
select 
      id, --编号
      order_id, --订单id
      user_id,
      sku_id,
      sku_name,
      order_price,
      sku_num,
      create_time,
      source_type,
      source_id
from ods_order_detail where dt='20201031'),
order_info_tmp as (
select 
      id, --订单id
      final_total_amount,
      province_id,
      benefit_reduce_amount,
      original_total_amount,
      feight_fee
from ods_order_info where dt='20201031')
insert overwrite table dwd_fact_order_detail partition(dt='20201031')
select 
      id,
	  order_id,
	  sku_id,
	  sku_name,
	  order_price,
	  sku_num,
	  create_time,
	  province_id,
	  source_type,
	  source_id,
	  if(rank=1, original_amount_d+original_total_amount-sum_original_amount_d, original_amount_d) as original_amount_d,
	  if(rank=1, final_amount_d+final_total_amount-sum_final_amount_d, final_amount_d) as final_amount_d,
	  if(rank=1, feight_fee_d+feight_fee-sum_feight_fee_d, feight_fee_d) as feight_fee_d,
	  if(rank=1, benefit_reduce_amount_d+benefit_reduce_amount-sum_benefit_reduce_amount_d, benefit_reduce_amount_d) as benefit_reduce_amount_d,
from (
select 
      order_detail.id as id,
      order_detail.order_id as order_id,
      order_detail.user_id as user_id,
      order_detail.sku_id as sku_id,
      order_detail.sku_name as sku_name,
      order_detail.order_price as order_price,
      order_detail.sku_num as sku_num,
      order_detail.create_time as create_time,
      order_info.province_id as province_id,
      order_detail.source_type as source_type,
      order_detail.source_id as source_id,
      order_info.final_total_amount,
      order_info.benefit_reduce_amount,
      order_info.original_total_amount,
      order_info.feight_fee,
      round(order_detail.order_price*order_detail.sku_num,2) as original_amount_d,
      round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2) as final_amount_d,
      round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2) as feight_fee_d,
      round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2) as benefit_reduce_amount_d,
    
sum(round(order_detail.order_price*order_detail.sku_num,2)) over(partition by order_id) as sum_original_amount_d,
	  sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.final_total_amount,2)) over(partition by order_id) as sum_final_amount_d,
	  sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.feight_fee,2)) over(partition by order_id) as sum_feight_fee_d,
	  sum(round(order_detail.order_price*order_detail.sku_num/order_info.original_total_amount*order_info.benefit_reduce_amount,2)) over(partition by order_id) as sum_benefit_reduce_amount_d,
	  
row_number() over(partition by order_id order by round(order_detail.order_price*order_detail.sku_num,2)) as rank

from order_detail_tmp order_detail left join order_info_tmp order_info on order_detail.order_id = order_info.id) tmp
posted on 2020-11-29 18:16  jeasonchen001  阅读(786)  评论(0编辑  收藏  举报