1. 手动导入表
1. 对ODS层数据加工
如:
ODS地区+ODS省份=>DWD省份地区
ODS 商品信息 + ODS 品牌 + ODS 商品一级分类 + ODS 商品二级分类 + ODS 商品三级分类=>DWD商品信息
2. DWD层表结构

3. 创建表
| CREATE TABLE `dwd_order_info` ( |
| `id` string COMMENT '订单 id', |
| `total_amount` double COMMENT '订单总额', |
| `order_status` string COMMENT ' 1 未支付 2 已支付 3 已发货 4 已收货 5 完成', |
| `user_id` string COMMENT '用户 id', |
| `payment_way` string COMMENT '付款方式', |
| `out_trade_no` string COMMENT '订单流失号', |
| `province_id` string COMMENT '省市 id', |
| `create_time` string COMMENT '创建时间', |
| `operate_time` string COMMENT '修改时间' |
| ) |
| COMMENT '订单表' |
| PARTITIONED BY (ds string); |
| CREATE TABLE `dwd_order_detail` ( |
| `id` string COMMENT '明细 id', |
| `order_id` string COMMENT '订单 id', |
| `user_id` string COMMENT '用户 id', |
| `sku_id` string COMMENT '商品 id', |
| `sku_name` string COMMENT '商品名称', |
| `order_price` string COMMENT '购买价格', |
| `sku_num` string COMMENT '购物数量', |
| `province_id` string COMMENT '省市 id', |
| `create_time` string COMMENT '创建时间' |
| ) |
| COMMENT '订单明细' |
| PARTITIONED BY (ds string); |
| CREATE TABLE `dim_sku_info_df` ( |
| `id` string COMMENT '商品 id', |
| `spu_id` string COMMENT 'spuid', |
| `price` double COMMENT '商品价格', |
| `sku_name` string COMMENT '商品名称', |
| `sku_desc` string COMMENT '商品描述', |
| `weight` double COMMENT '重量', |
| `tm_id` string COMMENT '品牌 id', |
| `tm_name` string COMMENT '品牌名称', |
| `category3_id` string COMMENT '三级分类 id', |
| `category2_id` string COMMENT '二级分类 id', |
| `category1_id` string COMMENT '一级分类 id', |
| `category3_name` string COMMENT '三级分类名称', |
| `category2_name` string COMMENT '二级分类名称', |
| `category1_name` string COMMENT '一级分类名称', |
| `create_time` string COMMENT '创建时间' |
| ) |
| COMMENT '商品表信息' |
| PARTITIONED BY (ds string); |
| CREATE TABLE `dim_user_info_df` ( |
| `id` string COMMENT 'id', |
| `name` string COMMENT '用户名称', |
| `birthday` string COMMENT '生日', |
| `gender` string COMMENT '性别', |
| `email` string COMMENT '邮箱', |
| `user_level` string COMMENT '等级', |
| `create_time` string COMMENT '注册时间' |
| ) |
| COMMENT '用户信息表' |
| PARTITIONED BY (ds string); |
| CREATE TABLE `dim_base_province_df` ( |
| `id` string COMMENT 'id', |
| `province_name` string COMMENT '省市名称', |
| `region_id` string COMMENT '地区 id', |
| `region_name` string COMMENT '地区名称' |
| ) |
| COMMENT '地区省市表' |
| PARTITIONED BY (ds string); |
4. 手动将数据导入DWD层
| Insert overwrite table dwd_order_info_di partition(ds) |
| select id, |
| total_amount, |
| order_status, |
| user_id, |
| payment_way, |
| out_trade_no, |
| province_id, |
| create_time, |
| operate_time, |
| ds |
| from ods_order_info_di |
| where ds='20220518' and id is not null; |
| |
| |
| insert overwrite table dwd_order_detail_di partition(ds) |
| select od.id, |
| order_id, |
| oi.user_id, |
| sku_id, |
| sku_name, |
| order_price, |
| sku_num, |
| oi.province_id, |
| od.create_time, |
| od.ds |
| from ods_order_detail_di od join ods_order_info_di oi |
| on od.order_id = oi.id and oi.ds = '20220518' |
| and od.ds = '20220518' and od.id is not null; |
| |
| |
| insert overwrite table dim_sku_info_df partition(ds) |
| select |
| sku.id, |
| sku.spu_id, |
| sku.price, |
| sku.sku_name, |
| sku.sku_desc, |
| sku.weight, |
| sku.tm_id, |
| tm.tm_name, |
| sku.category3_id, |
| c2.id category2_id , |
| c1.id category1_id, |
| c3.name category3_name, |
| c2.name category2_name, |
| c1.name category1_name, |
| sku.create_time, |
| sku.ds |
| from |
| ( |
| select * |
| from ods_sku_info_df |
| where ds='20220518' and id is not null |
| ) sku |
| join ods_base_category3_df c3 on sku.category3_id = c3.id and |
| c3.ds = '20220518' |
| join ods_base_category2_df c2 on c3.category2_id = c2.id and |
| c2.ds = '20220518' |
| join ods_base_category1_df c1 on c2.category1_id = c1.id and |
| c1.ds = '20220518' |
| join ods_base_trademark_df tm on tm.tm_id = sku.tm_id and tm.ds |
| = '20220518'; |
| |
| |
| insert overwrite table dim_user_info_df partition(ds) |
| select id, |
| name , |
| birthday, |
| gender, |
| email, |
| user_level, |
| create_time, |
| ds from ods_user_info_df |
| where ds='20220518' and id is not null; |
| |
| |
| insert overwrite table dim_base_province_df PARTITION (ds) |
| select |
| p.id, |
| p.name, |
| p.region_id, |
| r.region_name, |
| p.ds |
| from ods_base_province_df p join ods_base_region_df r |
| on p.region_id = r.id and p.ds='20220518' and r.ds = |
| '20220518'; |
5.手动查询
| select * from dwd_order_info where ds='20220521'; |
| select * from dwd_order_detail where ds='20220521'; |
| select * from dim_sku_info_df where ds='20220518'; |
| select * from dim_user_info_df where ds='20220518'; |
| select * from dim_base_province_df where ds='20220518'; |
2. DWD层数据导入脚本
1. 表dwd_order_info

| Insert overwrite table dwd_order_info partition(ds) |
| Select id, |
| total_amount, |
| order_status, |
| user_id, |
| payment_way, |
| out_trade_no, |
| province_id, |
| create_time, |
| operate_time, |
| ds |
| from ods_order_info_di |
| where ds='${bizdate}' and id is not null; |
2. 表dwd_order_detail

| insert overwrite table dwd_order_detail partition(ds) |
| select od.id, |
| order_id, |
| oi.user_id, |
| sku_id, |
| sku_name, |
| order_price, |
| sku_num, |
| oi.province_id, |
| od.create_time, |
| od.ds |
| from ods_order_detail_di od join ods_order_info_di oi |
| on od.order_id = oi.id and oi.ds = '${bizdate}' |
| and od.ds = '${bizdate}' and od.id is not null; |
3. 表dim_sku_info_df_sql

| insert overwrite table dim_sku_info_df partition(ds) |
| select |
| sku.id, |
| sku.spu_id, |
| sku.price, |
| sku.sku_name, |
| sku.sku_desc, |
| sku.weight, |
| sku.tm_id, |
| tm.tm_name, |
| sku.category3_id, |
| c2.id category2_id , |
| c1.id category1_id, |
| c3.name category3_name, |
| c2.name category2_name, |
| c1.name category1_name, |
| sku.create_time, |
| sku.ds |
| from |
| ( |
| select * |
| from ods_sku_info_df |
| where ds='${bizdate}' and id is not null |
| ) sku |
| join ods_base_category3_df c3 on sku.category3_id = c3.id and |
| c3.ds = '${bizdate}' |
| join ods_base_category2_df c2 on c3.category2_id = c2.id and |
| c2.ds = '${bizdate}' |
| join ods_base_category1_df c1 on c2.category1_id = c1.id and |
| c1.ds = '${bizdate}' |
| join ods_base_trademark_df tm on tm.tm_id = sku.tm_id and tm.ds |
| = '${bizdate}'; |
4. 表dim_user_info_df

| insert overwrite table dim_user_info_df partition(ds) |
| select id, |
| name , |
| birthday, |
| gender, |
| email, |
| user_level, |
| create_time, |
| ds from ods_user_info_df |
| where ds='${bizdate}' and id is not null; |
5. 表dim_base_province_df

| insert overwrite table dim_base_province_df PARTITION (ds) |
| select |
| p.id, |
| p.name, |
| p.region_id, |
| r.region_name, |
| p.ds |
| from ods_base_province_df p join ods_base_region_df r |
| on p.region_id = r.id and p.ds='${bizdate}' and r.ds = |
| '${bizdate}'; |
2. dws层
1. 建表语句

| CREATE TABLE `dws_trade_detail` ( |
| `user_id` string COMMENT '用户 id', |
| `sku_id` string COMMENT '商品 Id', |
| `user_gender` string COMMENT '用户性别', |
| `user_age` string COMMENT '用户年龄', |
| `user_level` string COMMENT '用户等级', |
| `sku_price` double COMMENT '商品当日价格', |
| `sku_name` string COMMENT '商品名称', |
| `sku_category3_id` string COMMENT '商品三级品类 id', |
| `sku_category2_id` string COMMENT '商品二级品类 id', |
| `sku_category1_id` string COMMENT '商品一级品类 id', |
| `sku_category3_name` string COMMENT '商品三级品类名称', |
| `sku_category2_name` string COMMENT '商品二级品类名称', |
| `sku_category1_name` string COMMENT '商品一级品类名称', |
| `spu_id` string COMMENT '商品 spu', |
| `tm_id` string COMMENT '品牌 id', |
| `tm_name` string COMMENT '品牌名称', |
| `province_id` string COMMENT '省市 id', |
| `province_name` string COMMENT '省市名称', |
| `region_id` string COMMENT '地区 id', |
| `region_name` string COMMENT '地区名称', |
| `sku_num` double COMMENT '购买个数', |
| `order_count` bigint COMMENT '当日下单单数', |
| `order_amount` double COMMENT '当日下单金额' |
| ) |
| COMMENT '用户单日交易行为宽表' |
| PARTITIONED BY (ds string); |
2. 插入DWS表
| with tmp_trade AS |
| ( |
| select |
| od.user_id,od.sku_id,od.province_id, |
| sum(sku_num) sku_num, |
| count(*) order_count, |
| sum(od.order_price*sku_num) order_amount |
| from dwd_order_detail od |
| where od.ds='20220521' |
| group by od.user_id, od.sku_id, od.province_id |
| ) |
| insert OVERWRITE TABLE dws_trade_detail PARTITION(ds='20220521') |
| select |
| tmp_trade.user_id, |
| tmp_trade.sku_id, |
| u.gender, |
| months_between(to_char(to_date('20220521','yyyymmdd'),'yyyy-mm-dd'), u.birthday)/12 age, |
| u.user_level, |
| price, |
| sku_name, |
| category3_id, |
| category2_id, |
| category1_id, |
| category3_name, |
| category2_name, |
| category1_name, |
| spu_id, |
| tm_id, |
| tm_name, |
| p.id, |
| p.province_name, |
| p.region_id, |
| p.region_name, |
| tmp_trade.sku_num, |
| tmp_trade.order_count, |
| tmp_trade.order_amount |
| from tmp_trade |
| left join dim_user_info_df u on u.id = tmp_trade.user_id and |
| u.ds = '20220521' |
| left join dim_sku_info_df s on tmp_trade.sku_id = s.id and s.ds |
| = '20220521' |
| left join dim_base_province_df p on tmp_trade.province_id = p.id |
| and p.ds='20220521'; |
3. 查询dws_trade_detail表
| select * |
| from dws_trade_detail |
| where ds = '20220521'; |
4. trade_detail脚本导入

| with tmp_trade AS |
| ( |
| select |
| od.user_id,od.sku_id,od.province_id, |
| sum(sku_num) sku_num, |
| count(*) order_count, |
| sum(od.order_price*sku_num) order_amount |
| from dwd_order_detail od |
| where od.ds='${bizdate}' |
| group by od.user_id, od.sku_id, od.province_id |
| ) |
| insert OVERWRITE TABLE dws_trade_detail PARTITION (ds='${bizdate}') |
| select |
| tmp_trade.user_id, |
| tmp_trade.sku_id, |
| u.gender, |
| months_between('${bizdate}', u.birthday)/12 age, |
| u.user_level, |
| price, |
| sku_name, |
| category3_id, |
| category2_id, |
| category1_id, |
| category3_name, |
| category2_name, |
| category1_name, |
| spu_id, |
| tm_id, |
| tm_name, |
| p.id, |
| p.province_name, |
| p.region_id, |
| p.region_name, |
| tmp_trade.sku_num, |
| tmp_trade.order_count, |
| tmp_trade.order_amount |
| from tmp_trade |
| left join dim_user_info_df u on u.id = tmp_trade.user_id and |
| u.ds = '${bizdate}' |
| left join dim_sku_info_df s on tmp_trade.sku_id = s.id and s.ds |
| = '${bizdate}' |
| left join dim_base_province_df p on tmp_trade.province_id = p.id |
| and p.ds='${bizdate}'; |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通