拉链表-增量更新方法一

参考文档:http://lxw1234.com/archives/2015/08/473.htm

一、元表结构

1、定义业务库原始订单表:

drop table chavin.orders;

CREATE TABLE orders (

orderid INT,

createtime STRING,

modifiedtime STRING,

status STRING

)row format delimited fields terminated by '\t'

stored AS textfile;

--加载测试数据

1 2015-08-18 2015-08-18 创建

2 2015-08-18 2015-08-18 创建

3 2015-08-19 2015-08-21 支付

4 2015-08-19 2015-08-21 完成

5 2015-08-19 2015-08-20 支付

6 2015-08-20 2015-08-20 创建

7 2015-08-20 2015-08-21 支付

8 2015-08-21 2015-08-21 创建

2、定义ODS订单表结构,采用日分区存储:

drop table t_ods_orders_inc;

CREATE TABLE t_ods_orders_inc (

orderid INT,

createtime STRING,

modifiedtime STRING,

status STRING

) PARTITIONED BY (day STRING)

row format delimited fields terminated by '\t'

stored AS textfile;

3、创建dw层历史订单表:

drop table t_dw_orders_his;

CREATE TABLE t_dw_orders_his (

orderid INT,

createtime STRING,

modifiedtime STRING,

status STRING,

dw_start_date STRING,

dw_end_date STRING

)row format delimited fields terminated by '\t'

stored AS textfile;

二、初始化dw层历史订单表:

1、将源库订单表历史数据插入到ods订单表中:

INSERT overwrite TABLE t_ods_orders_inc PARTITION (day = '2015-08-20')

SELECT orderid,createtime,modifiedtime,status

FROM chavin.orders

WHERE cast(createtime as date) <= '2015-08-20';

2、通过ods层订单表数据初始化dw层历史订单表:

INSERT overwrite TABLE t_dw_orders_his

SELECT orderid,createtime,modifiedtime,status,

createtime AS dw_start_date,

'9999-12-31' AS dw_end_date

FROM t_ods_orders_inc

WHERE day = '2015-08-20';

三、增量添加数据

1、将原始订单表增量数据插入到ods层订单表前一天分区中:

INSERT overwrite TABLE t_ods_orders_inc PARTITION (day = '2015-08-21')

SELECT orderid,createtime,modifiedtime,status

FROM orders

WHERE createtime = '2015-08-21' OR modifiedtime = '2015-08-21';

2、通过dw历史数据和ods增量数据刷新dw历史数据,此处采用临时表方法:

DROP TABLE IF EXISTS t_dw_orders_his_tmp;

CREATE TABLE t_dw_orders_his_tmp AS

SELECT orderid,

createtime,

modifiedtime,

status,

dw_start_date,

dw_end_date

FROM (

    SELECT a.orderid,

    a.createtime,

    a.modifiedtime,

    a.status,

    a.dw_start_date,

    CASE WHEN b.orderid IS NOT NULL AND a.dw_end_date > '2015-08-21' THEN '2015-08-20' ELSE a.dw_end_date END AS dw_end_date

    FROM t_dw_orders_his a

    left outer join (SELECT * FROM t_ods_orders_inc WHERE day = '2015-08-21') b

    ON (a.orderid = b.orderid)

    UNION ALL

    SELECT orderid,

    createtime,

    modifiedtime,

    status,

    modifiedtime AS dw_start_date,

    '9999-12-31' AS dw_end_date

    FROM t_ods_orders_inc

    WHERE day = '2015-08-21'

) x

ORDER BY orderid,dw_start_date;

3、根据历史表更新dw层历史订单表:

INSERT overwrite TABLE t_dw_orders_his

SELECT * FROM t_dw_orders_his_tmp;

4、根据上面步骤增加22号数据:

--加载增量数据到ods层订单表分区'2015-08-22'中:

1 2015-08-18 2015-08-22 支付

2 2015-08-18 2015-08-22 完成

6 2015-08-20 2015-08-22 支付

9 2015-08-22 2015-08-22 创建

8 2015-08-22 2015-08-22 支付

10 2015-08-22 2015-08-22 支付

alter table t_ods_orders_inc add partition(day='2015-08-22');

load data local inpath '/opt/datas/orders22.txt' into table chavin.t_ods_orders_inc partition(day='2015-08-22');

--根据历史订单数据和增量数据更新历史订单表数据,此处采用临时表:

DROP TABLE IF EXISTS t_dw_orders_his_tmp;

CREATE TABLE t_dw_orders_his_tmp AS

SELECT orderid,

createtime,

modifiedtime,

status,

dw_start_date,

dw_end_date

FROM (

    SELECT a.orderid,

    a.createtime,

    a.modifiedtime,

    a.status,

    a.dw_start_date,

    CASE WHEN b.orderid IS NOT NULL AND a.dw_end_date > '2015-08-22' THEN '2015-08-21' ELSE a.dw_end_date END AS dw_end_date

    FROM t_dw_orders_his a

    left outer join (SELECT * FROM t_ods_orders_inc WHERE day = '2015-08-22') b

    ON (a.orderid = b.orderid)

    UNION ALL

    SELECT orderid,

    createtime,

    modifiedtime,

    status,

    modifiedtime AS dw_start_date,

    '9999-12-31' AS dw_end_date

    FROM t_ods_orders_inc

    WHERE day = '2015-08-22'

) x

ORDER BY orderid,dw_start_date;

--根据临时表更新历史订单表:

INSERT overwrite TABLE t_dw_orders_his

SELECT * FROM t_dw_orders_his_tmp;

5、查看2015-08-21、2015-08-21历史快照:

select * from t_dw_orders_his where dw_start_date <= '2015-08-21' and dw_end_date >= '2015-08-21';

select * from t_dw_orders_his where dw_start_date <= '2015-08-22' and dw_end_date >= '2015-08-22';

posted @ 2017-09-27 13:05  ChavinKing  阅读(2917)  评论(0编辑  收藏  举报