8. DWD层数据汇总

1. 手动导入表

1. 对ODS层数据加工

如:
ODS地区+ODS省份=>DWD省份地区
ODS 商品信息 + ODS 品牌 + ODS 商品一级分类 + ODS 商品二级分类 + ODS 商品三级分类=>DWD商品信息

2. DWD层表结构

3. 创建表

CREATE TABLE `dwd_order_info` (
`id` string COMMENT '订单 id',
`total_amount` double COMMENT '订单总额',
`order_status` string COMMENT ' 1 未支付 2 已支付 3 已发货 4 已收货 5 完成',
`user_id` string COMMENT '用户 id',
`payment_way` string COMMENT '付款方式',
`out_trade_no` string COMMENT '订单流失号',
`province_id` string COMMENT '省市 id',
`create_time` string COMMENT '创建时间',
`operate_time` string COMMENT '修改时间'
) 
COMMENT '订单表'
PARTITIONED BY (ds string);
CREATE TABLE `dwd_order_detail` (
`id` string COMMENT '明细 id',
`order_id` string COMMENT '订单 id',
`user_id` string COMMENT '用户 id',
`sku_id` string COMMENT '商品 id',
`sku_name` string COMMENT '商品名称',
`order_price` string COMMENT '购买价格',
`sku_num` string COMMENT '购物数量',
`province_id` string COMMENT '省市 id',
`create_time` string COMMENT '创建时间'
) 
COMMENT '订单明细'
PARTITIONED BY (ds string);
CREATE TABLE `dim_sku_info_df` (
`id` string COMMENT '商品 id',
`spu_id` string COMMENT 'spuid',
`price` double COMMENT '商品价格',
`sku_name` string COMMENT '商品名称',
`sku_desc` string COMMENT '商品描述',
`weight` double COMMENT '重量',
`tm_id` string COMMENT '品牌 id',
`tm_name` string COMMENT '品牌名称',
`category3_id` string COMMENT '三级分类 id',
`category2_id` string COMMENT '二级分类 id',
`category1_id` string COMMENT '一级分类 id',
`category3_name` string COMMENT '三级分类名称',
`category2_name` string COMMENT '二级分类名称',
`category1_name` string COMMENT '一级分类名称',
`create_time` string COMMENT '创建时间'
)
COMMENT '商品表信息'
PARTITIONED BY (ds string);
CREATE TABLE `dim_user_info_df` (
`id` string COMMENT 'id',
`name` string COMMENT '用户名称',
`birthday` string COMMENT '生日',
`gender` string COMMENT '性别',
`email` string COMMENT '邮箱',
`user_level` string COMMENT '等级',
`create_time` string COMMENT '注册时间'
)
COMMENT '用户信息表'
PARTITIONED BY (ds string);
CREATE TABLE `dim_base_province_df` (
`id` string COMMENT 'id',
`province_name` string COMMENT '省市名称',
`region_id` string COMMENT '地区 id',
`region_name` string COMMENT '地区名称'
)
COMMENT '地区省市表'
PARTITIONED BY (ds string);

4. 手动将数据导入DWD层

Insert overwrite table dwd_order_info_di partition(ds)
select id,
 total_amount,
 order_status,
 user_id,
 payment_way,
 out_trade_no,
 province_id,
 create_time,
 operate_time,
 ds
from ods_order_info_di
where ds='20220518' and id is not null;


insert overwrite table dwd_order_detail_di partition(ds)
select od.id,
order_id,
oi.user_id,
sku_id,
sku_name,
order_price,
sku_num,
oi.province_id,
od.create_time,
od.ds 
from ods_order_detail_di od join ods_order_info_di oi 
on od.order_id = oi.id and oi.ds = '20220518' 
and od.ds = '20220518' and od.id is not null;


insert overwrite table dim_sku_info_df partition(ds)
select 
sku.id,
sku.spu_id, 
sku.price,
sku.sku_name, 
sku.sku_desc, 
sku.weight,
sku.tm_id, 
tm.tm_name, 
sku.category3_id, 
c2.id category2_id , 
c1.id category1_id, 
c3.name category3_name, 
c2.name category2_name, 
c1.name category1_name, 
sku.create_time,
sku.ds
from
( 
select * 
from ods_sku_info_df 
where ds='20220518' and id is not null 
) sku 
join ods_base_category3_df c3 on sku.category3_id = c3.id and 
c3.ds = '20220518' 
join ods_base_category2_df c2 on c3.category2_id = c2.id and 
c2.ds = '20220518' 
join ods_base_category1_df c1 on c2.category1_id = c1.id and 
c1.ds = '20220518'
join ods_base_trademark_df tm on tm.tm_id = sku.tm_id and tm.ds 
= '20220518';


insert overwrite table dim_user_info_df partition(ds)
select id,
name ,
birthday,
gender,
email,
user_level,
create_time,
ds from ods_user_info_df
where ds='20220518' and id is not null;


insert overwrite table dim_base_province_df PARTITION (ds)
select
p.id,
p.name,
p.region_id,
r.region_name,
p.ds
from ods_base_province_df p join ods_base_region_df r
on p.region_id = r.id and p.ds='20220518' and r.ds = 
'20220518';

5.手动查询

select * from dwd_order_info where ds='20220521';
select * from dwd_order_detail where ds='20220521';
select * from dim_sku_info_df where ds='20220518';
select * from dim_user_info_df where ds='20220518';
select * from dim_base_province_df where ds='20220518';

2. DWD层数据导入脚本

1. 表dwd_order_info

Insert overwrite table dwd_order_info partition(ds)
Select id,
 total_amount,
 order_status,
 user_id,
 payment_way,
 out_trade_no,
 province_id,
 create_time,
 operate_time,
 ds
from ods_order_info_di
where ds='${bizdate}' and id is not null;

2. 表dwd_order_detail

insert overwrite table dwd_order_detail partition(ds)
select od.id,
order_id,
oi.user_id,
sku_id,
sku_name,
order_price,
sku_num,
oi.province_id,
od.create_time,
od.ds 
from ods_order_detail_di od join ods_order_info_di oi 
on od.order_id = oi.id and oi.ds = '${bizdate}' 
and od.ds = '${bizdate}' and od.id is not null;

3. 表dim_sku_info_df_sql

insert overwrite table dim_sku_info_df partition(ds)
select 
 sku.id,
 sku.spu_id, 
 sku.price,
 sku.sku_name, 
 sku.sku_desc,
 sku.weight, 
 sku.tm_id, 
 tm.tm_name, 
 sku.category3_id, 
 c2.id category2_id , 
 c1.id category1_id, 
 c3.name category3_name, 
 c2.name category2_name, 
 c1.name category1_name, 
 sku.create_time,
 sku.ds
from
( 
 select * 
 from ods_sku_info_df 
 where ds='${bizdate}' and id is not null 
) sku 
join ods_base_category3_df c3 on sku.category3_id = c3.id and 
c3.ds = '${bizdate}' 
join ods_base_category2_df c2 on c3.category2_id = c2.id and 
c2.ds = '${bizdate}' 
join ods_base_category1_df c1 on c2.category1_id = c1.id and 
c1.ds = '${bizdate}'
join ods_base_trademark_df tm on tm.tm_id = sku.tm_id and tm.ds 
= '${bizdate}';

4. 表dim_user_info_df

insert overwrite table dim_user_info_df partition(ds)
select id,
 name ,
 birthday,
 gender,
 email,
 user_level,
 create_time,
 ds from ods_user_info_df
where ds='${bizdate}' and id is not null;

5. 表dim_base_province_df

insert overwrite table dim_base_province_df PARTITION (ds)
select
p.id,
p.name,
p.region_id,
r.region_name,
p.ds
from ods_base_province_df p join ods_base_region_df r
on p.region_id = r.id and p.ds='${bizdate}' and r.ds = 
'${bizdate}';

2. dws层

1. 建表语句

CREATE TABLE `dws_trade_detail` (
`user_id` string COMMENT '用户 id',
`sku_id` string COMMENT '商品 Id',
`user_gender` string COMMENT '用户性别',
`user_age` string COMMENT '用户年龄',
`user_level` string COMMENT '用户等级',
`sku_price` double COMMENT '商品当日价格',
`sku_name` string COMMENT '商品名称',
`sku_category3_id` string COMMENT '商品三级品类 id',
`sku_category2_id` string COMMENT '商品二级品类 id',
`sku_category1_id` string COMMENT '商品一级品类 id',
`sku_category3_name` string COMMENT '商品三级品类名称',
`sku_category2_name` string COMMENT '商品二级品类名称',
`sku_category1_name` string COMMENT '商品一级品类名称',
`spu_id` string COMMENT '商品 spu',
`tm_id` string COMMENT '品牌 id',
`tm_name` string COMMENT '品牌名称',
`province_id` string COMMENT '省市 id',
`province_name` string COMMENT '省市名称',
`region_id` string COMMENT '地区 id',
`region_name` string COMMENT '地区名称',
`sku_num` double COMMENT '购买个数',
`order_count` bigint COMMENT '当日下单单数',
`order_amount` double COMMENT '当日下单金额'
) 
COMMENT '用户单日交易行为宽表'
PARTITIONED BY (ds string);

2. 插入DWS表

with tmp_trade AS
( 
 select 
 od.user_id,od.sku_id,od.province_id,
 sum(sku_num) sku_num,
 count(*) order_count,
 sum(od.order_price*sku_num) order_amount
 from dwd_order_detail od
 where od.ds='20220521'
 group by od.user_id, od.sku_id, od.province_id
)
insert OVERWRITE TABLE dws_trade_detail PARTITION(ds='20220521')
select 
 tmp_trade.user_id,
 tmp_trade.sku_id,
 u.gender,
months_between(to_char(to_date('20220521','yyyymmdd'),'yyyy-mm-dd'), u.birthday)/12 age,
 u.user_level,
 price,
 sku_name,
 category3_id, 
 category2_id, 
 category1_id, 
 category3_name, 
 category2_name, 
 category1_name,
 spu_id,
 tm_id,
 tm_name,
 p.id,
 p.province_name,
 p.region_id,
 p.region_name,
 tmp_trade.sku_num,
 tmp_trade.order_count,
 tmp_trade.order_amount 
from tmp_trade 
left join dim_user_info_df u on u.id = tmp_trade.user_id and 
u.ds = '20220521'
left join dim_sku_info_df s on tmp_trade.sku_id = s.id and s.ds 
= '20220521'
left join dim_base_province_df p on tmp_trade.province_id = p.id 
and p.ds='20220521';

3. 查询dws_trade_detail表

select * 
from dws_trade_detail
where ds = '20220521';

4. trade_detail脚本导入

with tmp_trade AS
( 
 select 
 od.user_id,od.sku_id,od.province_id,
 sum(sku_num) sku_num,
 count(*) order_count,
 sum(od.order_price*sku_num) order_amount
 from dwd_order_detail od
 where od.ds='${bizdate}'
 group by od.user_id, od.sku_id, od.province_id
)
insert OVERWRITE TABLE dws_trade_detail PARTITION (ds='${bizdate}')
select 
 tmp_trade.user_id,
 tmp_trade.sku_id,
 u.gender,
 months_between('${bizdate}', u.birthday)/12 age, 
 u.user_level,
 price,
 sku_name,
 category3_id, 
 category2_id, 
 category1_id, 
 category3_name, 
 category2_name, 
 category1_name,
 spu_id,
 tm_id,
 tm_name,
 p.id,
 p.province_name,
 p.region_id,
 p.region_name,
 tmp_trade.sku_num,
 tmp_trade.order_count,
 tmp_trade.order_amount 
from tmp_trade 
left join dim_user_info_df u on u.id = tmp_trade.user_id and 
u.ds = '${bizdate}'
left join dim_sku_info_df s on tmp_trade.sku_id = s.id and s.ds 
= '${bizdate}'
left join dim_base_province_df p on tmp_trade.province_id = p.id 
and p.ds='${bizdate}';
posted @ 2022-05-22 18:25  jsqup  阅读(154)  评论(0编辑  收藏  举报