拉链表
- 介绍:记录历史。记录一个事务从开始,一直到当前状态的所有变化的信息。
使用场景
- 表中的数据量很大。(每天都存储会占用很多空间)
- 表中的部分字段会被更新。
- 需要查看某一个时间点或者时间段的历史快照信息。
- 表中的记录变化的比例和频率不是很大。
拉链表优势
优势:既能获取最新的数据,也能添加筛选条件也获取历史的数据。
说明
ods为全量表
具体案例
1.新增的直接取过来
2.更新的保留前一天数据
3.不变的记录,取最后一次更新的数据
4.拉链结束的数据直接拿过来
5.删除的数据则保留前一日数据,并将记录结束日期记成更新日期
表结构
| CREATE EXTERNAL TABLE if not exists ods_business( |
| id int COMMENT '自增主键' |
| ,corp_id int COMMENT '公司ID' |
| ,uid int COMMENT 'UID' |
| ,resume_count int COMMENT '简历下载量' |
| ,used_count int COMMENT '已使用简历下载量' |
| ,update_date string COMMENT '更新时间' |
| ,create_date string COMMENT '创建时间' |
| ) partitioned by(dt string comment '分区日期') |
| row format delimited fields terminated by '\t' |
| stored as textfile; |
| |
| CREATE EXTERNAL TABLE if not exists dwd_business ( |
| id int COMMENT '自增主键' |
| ,corp_id int COMMENT '公司ID' |
| ,uid int COMMENT 'UID' |
| ,resume_count int COMMENT '简历下载量' |
| ,used_count int COMMENT '已使用简历下载量' |
| ,update_date string COMMENT '更新时间' |
| ,create_date string COMMENT '创建时间' |
| ,record_start_date string COMMENT '记录有效开始时间' |
| ,record_end_date string COMMENT '记录有效结束时间' |
| ) partitioned by(dt string comment '分区日期') |
| row format delimited fields terminated by '\t' |
| stored as textfile; |
| |
| CREATE EXTERNAL TABLE if not exists mid_business ( |
| id int COMMENT '自增主键' |
| ,corp_id int COMMENT '公司ID' |
| ,uid int COMMENT 'UID' |
| ,resume_count int COMMENT '简历下载量' |
| ,used_count int COMMENT '已使用简历下载量' |
| ,update_date string COMMENT '更新时间' |
| ,create_date string COMMENT '创建时间' |
| ,record_start_date string COMMENT '记录有效开始时间' |
| ,record_end_date string COMMENT '记录有效结束时间' |
| ) partitioned by(dt string comment '分区日期') |
| row format delimited fields terminated by '\t' |
| stored as textfile; |
ODS表处理
| INSERT overwrite table ods_business partition(dt='2023-10-22') |
| VALUES |
| (1, 1021, 102931, 15, 2, '2022-10-22', '2022-10-21'), |
| (2, 1001, 1342, 52, 23, '2022-10-22', '2022-10-21'), |
| (3, 1043, 3563, 33, 12, '2022-10-22', '2022-10-21'), |
| (4, 1212, 5745, 53, 15, '2022-10-22', '2022-10-21'), |
| (5, 15342, 7543, 7, 1, '2022-10-22', '2022-10-21'), |
| (6, 1153, 7486, 21, 4, '2022-10-22', '2022-10-21'), |
| (7, 1865, 8534, 14, 9, '2022-10-22', '2022-10-21'), |
| (8, 1453, 5423, 25, 15, '2022-10-22', '2022-10-21'); |
| |
| INSERT into table ods_business partition(dt='2023-10-23') |
| VALUES |
| (1, 1021, 102931, 15, 6, '2022-10-23', '2022-10-21'), |
| (2, 1001, 1342, 52, 26, '2022-10-23', '2022-10-21'), |
| (3, 1043, 3563, 33, 12, '2022-10-22', '2022-10-21'), |
| (4, 1212, 5745, 53, 15, '2022-10-22', '2022-10-21'), |
| (5, 15342, 7543, 7, 1, '2022-10-22', '2022-10-21'), |
| (6, 1153, 7486, 21, 4, '2022-10-22', '2022-10-21'), |
| (7, 1865, 8534, 14, 9, '2022-10-22', '2022-10-21'), |
| (8, 1453, 5423, 25, 15, '2022-10-22', '2022-10-21'), |
| (9, 3211, 6543, 19, 14, '2022-10-23', '2022-10-23'), |
| (10, 1263, 1264, 45, 31, '2022-10-23', '2022-10-23'); |
| INSERT OVERWRITE TABLE dwd_business partition(dt='2023-10-23') |
| select distinct |
| id, corp_id, uid, resume_count, used_count, update_date, create_date, |
| record_start_date, record_end_date |
| from mid_test_business |
| where dt='2023-10-23' |
表处理
| |
| INSERT OVERWRITE TABLE mid_business partition(dt='2023-10-23') |
| select |
| t1.id, |
| t1.corp_id, |
| t1.uid, |
| t1.resume_count, |
| t1.used_count, |
| t1.update_date, |
| t1.create_date, |
| t1.create_date as record_start_date, |
| '3000-12-31' as record_end_date |
| from |
| ( |
| select * from ods_business where dt='2023-10-23' |
| ) t1 |
| left join |
| ( |
| select * from dwd_business |
| where dt = '2023-10-22' |
| and record_start_date <= '2023-10-23' |
| and record_end_date > '2023-10-23' |
| ) t2 |
| on t1.id = t2.id |
| where t2.id is null; |
| |
| |
| |
| INSERT INTO TABLE mid_business partition(dt='2023-10-23') |
| select |
| t1.id, |
| t2.corp_id, |
| t2.uid, |
| t2.resume_count, |
| t2.used_count, |
| t2.update_date, |
| t2.create_date, |
| t2.record_start_date as record_start_date, |
| t1.update_date as record_end_date |
| from |
| ( |
| select * from ods_business where dt='2023-10-23' |
| ) t1 |
| inner join |
| ( |
| select * from dwd_business |
| where dt = '2023-10-22' |
| and record_start_date <= '2023-10-23' |
| and record_end_date > '2023-10-23' |
| ) t2 |
| on t1.id = t2.id |
| where t1.update_date > t2.update_date; |
| |
| INSERT INTO TABLE mid_business partition(dt='2023-10-23') |
| select |
| t1.id, |
| t1.corp_id, |
| t1.uid, |
| t1.resume_count, |
| t1.used_count, |
| t1.update_date, |
| t1.create_date, |
| t1.update_date as record_start_date, |
| '3000-12-31' as record_end_date |
| from |
| ( |
| select * from ods_business where dt='2023-10-23' |
| ) t1 |
| inner join |
| ( |
| select * from dwd_business |
| where dt = '2023-10-22' |
| and record_start_date <= '2023-10-23' |
| and record_end_date > '2023-10-23' |
| ) t2 |
| on t1.id = t2.id |
| where t1.update_date > t2.update_date; |
| |
| |
| INSERT INTO TABLE mid_business partition(dt='2023-10-23') |
| select |
| t2.id, |
| t2.corp_id, |
| t2.uid, |
| t2.resume_count, |
| t2.used_count, |
| t2.update_date, |
| t2.create_date, |
| t2.update_date as record_start_date, |
| '3000-12-31' as record_end_date |
| from |
| ( |
| select * from ods_business where dt='2023-10-23' |
| ) t1 |
| inner join |
| ( |
| select * from dwd_business |
| where dt = '2023-10-22' |
| and record_start_date <= '2023-10-23' |
| and record_end_date > '2023-10-23' |
| ) t2 |
| on t1.id = t2.id |
| where t1.update_date = t2.update_date; |
| |
| |
| INSERT INTO TABLE mid_business partition(dt='2023-10-23') |
| select |
| t1.id, |
| t1.corp_id, |
| t1.uid, |
| t1.resume_count, |
| t1.used_count, |
| t1.update_date, |
| t1.create_date, |
| t1.record_start_date as record_start_date, |
| t1.update_date as record_end_date |
| from |
| ( |
| select * from dwd_business |
| where dt = '2023-10-22' |
| and record_start_date <= '2023-10-23' |
| and record_end_date > '2023-10-23' |
| ) t1 |
| left join |
| ( |
| select * |
| from ods_business |
| where dt = '2023-10-23' |
| ) t2 |
| on t1.id = t2.id |
| where t2.id is null; |
| |
| |
| INSERT INTO TABLE mid_business partition(dt='2023-10-23') |
| select |
| t1.id, |
| t1.corp_id, |
| t1.uid, |
| t1.resume_count, |
| t1.used_count, |
| t1.update_date, |
| t1.create_date, |
| t1.record_start_date as record_start_date, |
| t1.record_end_date as record_end_date |
| from |
| dwd_business t1 |
| where dt = '2023-10-22' |
| and record_start_date < '2023-10-23'; |
其他实现方式
实现方式一: 五种情况一一实现
将【新增、删除、更新、不变、拉链结束】这五种情况,一一列举实现。
实现方式二: 将五种情况合并实现
【修改之后+不变+新增】存放到99991231最新分区
| |
| insert overwrite table dim_profit_center_dzip partition(ds) |
| select |
| if ( |
| |
| profit_center_old.profit_center_code = profit_center_new.BIC_ZPRFI_CTR |
| and profit_center_old.profit_center_short_desc = profit_center_new.TXTSH |
| and profit_center_old.profit_center_middle_desc = profit_center_new.TXTMD, |
| profit_center_old.profit_center_id, |
| concat('${bizdate}', profit_center_new.BIC_ZPRFI_CTR) |
| ) as profit_center_id, |
| profit_center_new.BIC_ZPRFI_CTR, |
| profit_center_new.TXTSH, |
| profit_center_new.TXTMD, |
| if ( |
| |
| profit_center_old.profit_center_code = profit_center_new.BIC_ZPRFI_CTR |
| and profit_center_old.profit_center_short_desc = profit_center_new.TXTSH |
| and profit_center_old.profit_center_middle_desc = profit_center_new.TXTMD, |
| profit_center_old.start_date, |
| '${bizdate}' |
| ) as start_date, |
| '99991231' end_date, |
| '99991231' ds |
| from |
| ( |
| select |
| profit_center_id, |
| profit_center_code, |
| profit_center_short_desc, |
| profit_center_middle_desc, |
| start_date |
| from dim_profit_center_dzip |
| where ds = '99991231' |
| ) profit_center_old |
| right join |
| ( |
| select |
| BIC_ZPRFI_CTR, |
| TXTSH, |
| TXTMD |
| from |
| ( |
| select |
| BIC_ZPRFI_CTR, |
| TXTSH, |
| TXTMD, |
| row_number() over(partition by BIC_ZPRFI_CTR order by BIC_ZPRFI_CTR) rn |
| from ic_sap_lake.ods_sap_sys_bic_zeis_master_m_zprfi_ctr_ds_h |
| where BIC_ZPRFI_CTR is not null and BIC_ZPRFI_CTR <> '' |
| ) profit_center_d |
| where profit_center_d.rn = 1 |
| ) profit_center_new |
| on profit_center_old.profit_center_code = profit_center_new.BIC_ZPRFI_CTR |
【修改+删除】存放到历史分区
| |
| select |
| profit_center_old.profit_center_id, |
| profit_center_old.profit_center_code, |
| profit_center_old.profit_center_short_desc, |
| profit_center_old.profit_center_middle_desc, |
| profit_center_old.start_date, |
| '${bizdate - 1}' end_date, |
| '${bizdate - 1}' ds |
| from |
| ( |
| select |
| profit_center_id, |
| profit_center_code, |
| profit_center_short_desc, |
| profit_center_middle_desc, |
| start_date |
| from dim_profit_center_dzip |
| where ds = '99991231' |
| ) profit_center_old |
| left join |
| ( |
| select |
| BIC_ZPRFI_CTR, |
| TXTSH, |
| TXTMD |
| from |
| ( |
| select |
| BIC_ZPRFI_CTR, |
| TXTSH, |
| TXTMD, |
| row_number() over(partition by BIC_ZPRFI_CTR order by BIC_ZPRFI_CTR) rn |
| from ic_sap_lake.ods_sap_sys_bic_zeis_master_m_zprfi_ctr_ds_h |
| where BIC_ZPRFI_CTR is not null and BIC_ZPRFI_CTR <> '' |
| ) profit_center_d |
| where profit_center_d.rn = 1 |
| ) profit_center_new |
| on profit_center_old.profit_center_code = profit_center_new.BIC_ZPRFI_CTR |
| where profit_center_old.profit_center_code != profit_center_new.BIC_ZPRFI_CTR |
| or profit_center_old.profit_center_short_desc != profit_center_new.TXTSH |
| or profit_center_old.profit_center_middle_desc != profit_center_new.TXTMD |
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· Manus的开源复刻OpenManus初探
· AI 智能体引爆开源社区「GitHub 热点速览」
· 三行代码完成国际化适配,妙~啊~
· .NET Core 中如何实现缓存的预热?