1.1 ods层的数据(增量数据,将新增和修改的数据导入ods)
sqoop语法是每天将get_time 或者using_time或者used_time为当前分区时间,或者优惠券状态发生改变的数据导入ods:
drop table if exists ods_coupon_use;
create external table ods_coupon_use(
`id` string COMMENT '编号',
`coupon_id` string COMMENT '优惠券 ID',
`user_id` string COMMENT 'userid',
`order_id` string COMMENT 'orderid',
`coupon_status` string COMMENT '优惠券状态',
`get_time` string COMMENT '领取时间',
`using_time` string COMMENT '使用时间(下单)',
`used_time` string COMMENT '使用时间(支付)'
) COMMENT '优惠券领用表'
PARTITIONED BY (`dt` string)
row format delimited fields terminated by '\t'
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location '/warehouse/gmall/ods/ods_coupon_use/';
1.2 分析ods数据
使用到ods表的字段:
coupon_id
user_id
order_id
get_time
using_time
used_time
status
1025优惠券业务数据库中的数据
1,1001, user01, order_001, 2020-10-25 10:00:00, null, null, 1
1026优惠券业务数据库中的数据
1,1001, user01, order_001, 2020-10-25 10:00:00, 2020-10-26 00:01:43, 2020-10-26 00:02:00, 1
2,2001, user10, order_10, 2020-10-26 9:01:00, 2020-10-26 11:20:00, null , 1
3,3001, user30, order_30, 2020-10-26 10:00:00, null, null , 1
1027优惠券业务数据库中的数据
1,1001, user01, order_001, 2020-10-25 10:00:00, 2020-10-26 00:01:43, 2020-10-26 00:02:00 , 0
2,2001, user10, order_10, 2020-10-26 9:01:00, 2020-10-26 11:20:00, 2020-10-27 11:20:00 , 1
3,3001, user30, order_30, 2020-10-26 10:00:00, 2020-10-27 11:20:00, 2020-10-27 13:00:00 , 1
4,4001, user40, order_40, 2020-10-27 08:08:00, null, null, 1
1.3 创建累积型快照事实表
drop table if exists dwd_fact_coupon_use;
create external table dwd_fact_coupon_use(
`id` string COMMENT '编号',
`coupon_id` string COMMENT '优惠券 ID',
`user_id` string COMMENT 'userid',
`order_id` string COMMENT '订单 id',
`coupon_status` string COMMENT '优惠券状态',
`get_time` string COMMENT '领取时间',
`using_time` string COMMENT '使用时间(下单)',
`used_time` string COMMENT '使用时间(支付)'
) COMMENT '优惠券领用事实表'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dwd/dwd_fact_coupon_use/'
tblproperties ("parquet.compression"="lzo");
--dt是按照优惠券领用时间get_time做为分区
1.4 待验证的方式
--使用动态分区 根据get_time进行动态分区,默认以查询出来的最后一列为分区字段
set hive.exec.dynamici.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table dwd_fact_coupon_use partition (dt)
select
if(b.id is not null , b.id, a.id) as id,
if(b.coupon_id is not null, b.coupon_id, a.coupon_id) as coupon_id,
if(b.user_id is not null, b.user_id, a.user_id) as user_id,
if(b.order_id is not null, b.order_id, a.order_id) as order_id,
if(b.using_time is not null, b.using_time, a.using_time) as using_time,
if(b.used_time is not null, b.used_time, a.used_time) as used_time,
if(b.flag is not null, b.flag, a.flag) as flag,
if(b.get_time is not null, b.get_time, a.get_time) as get_time,
from(
select
id,
coupon_id,
user_id,
order_id,
get_time,
using_time,
used_time,
flag
from dwd_fact_coupon_use where dt in (
select
get_time
from ods_coupon_use
where dt = '2020-10-26') a
full outer join (
select
id,
coupon_id,
user_id,
order_id,
get_time,
using_time,
used_time,
flag
from ods_coupon_use where dt ='2020-10-26') b
on a.id = b.id