ETL-拉链算法-1
参考链接:http://blog.csdn.net/nisjlvhudy/article/details/8559518
自己的理解:
主要针对数据的状态变化
表:原表:source
目标表:target
临时表:vt_new
临时表:vt_inc
vt_new将原表数据加工为容易处理的记录,增加起始时间与结束时间,即当日全量表
vt_inc保存target与vt_new表对比后的结果,即增量表
更新目标表
使用函数的参考链接:http://blog.csdn.net/rachel_luo/article/details/8073458
声明一个函数:
create or replace function data_common(P_ETLDATE date) return integer as
$body$
declare
skyid integer;
BEGIN
#将原表数据加工处理后插入vt_new
insert into vt_new select id,name,balance,data_date,'29991231' from source where data_date=P_ETLDATE;
#将目标表与当日全量表vt_new比较,将比较结果存入增量表vt_inc
insert into vt_inc
select id,name,balance,start_dt,'19900102' from vt_new VT where end_dt='29991231'
and(
not exists (select 1 from target T where VT.id=T.id)
or exists (select 1 from (select id, max(end_dt) end_dt from target group by id) TT where TT.id = VT.id and TT.end_dt > P_ETLDATE));
#将增量表中的目标表中已存在的某条记录更新,更新结束日期
update target t
set end_dt=P_ETLDATE
where end_dt='29991231'and exists
(select 1 from vt_inc c
where c.id=t.id and c.balance<>t.balance and c.start_dt=P_ETLDATE);
#将增量表中的目标表中不存在的某条记录添加到目标表中
insert into target
select id,name,balance,start_dt,'29991231' from vt_inc where start_dt=P_ETLDATE;
#删除两个临时表中的数据
truncate table vt_new;truncate table vt_inc;
skyid=1;
return skyid;
END;
$body$
ANGUAGE 'plpgsql' VOLATILE;
使用函数:
select data_common('20140101');
insert into vt_inc
select * from vt_new a left join target b on a.id=b.id and b.end_dt='29991231'
where b.id is null or (a.name<>b.name or a.bal<>b.bal);
ฅ平平庸庸的普通人ฅ