vertica merge 优化

-- 查看RDS的订单数(MySQL)

select count(*) from  (   SELECT   tid,    IF(LOCATE('pay_time', jdp_response)=0,null,SUBSTR(jdp_response,LOCATE('pay_time', jdp_response)+11,19)) pay_time   FROM jdp_tb_trade   where jdp_modified>='2015-11-11'  ) t0  where t0.pay_time is not null and t0.pay_time>='2015-11-11 00:00:00' and t0.pay_time<'2015-11-12 00:00:00' ;

-- 402277

-- 查看同步并解析的订单数(Vertica) select count(*) from trade where jdp_modified >='2015-11-11 00:00:00'   and pay_time >= '2015-11-11 00:00:00' and pay_time <'2015-11-12 00:00:00'

-- 402277

-- 查看数据仓库的订单数 select count(*) from rds_shop_order_detail where  pay_time >= '2015-11-11 00:00:00' and pay_time <'2015-11-12 00:00:00';

-- 402253 数据仓库漏单了....

问题记录:

1\双11晚上,同步数据丢失5W多数据。 原因:未明 解决方法:重新指定同步数据时间区间。

2\修改增量同步判读字段modified为jdp_modified.

3\加快crontab频率导致错误。 原因:由于数据还在入库时,新的JOB清空临时表,导致表被锁。 解决方法:加快入库速度,优化SQL秒出,慢SQL需要30分钟以上

以下语句满足优化的merge的条件 3.1\目标表的关联列有主键 3.2\所有目标表的列都包括在更新插入语句中 3.3\插入、更新列的属性相等 特别注意2.2,这种写法在oracle语法会报错,而在vertica如果不这样写,merge是全表扫描!

未优化的SQL: MERGE INTO jdp_tb_trade a USING jdp_tb_trade_tmp b ON a.tid = b.tid WHEN MATCHED THEN UPDATE SET status = b.status , type = b.type , seller_nick = b.seller_nick , buyer_nick = b.buyer_nick , created = b.created , modified = b.modified , jdp_hashcode = b.jdp_hashcode , jdp_response = b.jdp_response , jdp_created = b.jdp_created , jdp_modified = b.jdp_modified , load_time = substring(to_char(sysdate),0,20) WHEN NOT MATCHED THEN INSERT         ( tid , status , type , seller_nick , buyer_nick , created , modified , jdp_hashcode , jdp_response , jdp_created , jdp_modified , load_time )  VALUES ( b.tid , b.status , b.type , b.seller_nick , b.buyer_nick , b.created , b.modified , b.jdp_hashcode , b.jdp_response , b.jdp_created , b.jdp_modified , substring(to_char(sysdate),0,20) )

优化的SQL: MERGE INTO jdp_tb_trade a USING jdp_tb_trade_tmp b ON a.tid = b.tid WHEN MATCHED THEN UPDATE SET tid = b.tid , status = b.status , type = b.type , seller_nick = b.seller_nick , buyer_nick = b.buyer_nick , created = b.created , modified = b.modified , jdp_hashcode = b.jdp_hashcode , jdp_response = b.jdp_response , jdp_created = b.jdp_created , jdp_modified = b.jdp_modified , load_time = substring(to_char(sysdate),0,20) WHEN NOT MATCHED THEN INSERT         ( tid , status , type , seller_nick , buyer_nick , created , modified , jdp_hashcode , jdp_response , jdp_created , jdp_modified , load_time )  VALUES ( b.tid , b.status , b.type , b.seller_nick , b.buyer_nick , b.created , b.modified , b.jdp_hashcode , b.jdp_response , b.jdp_created , b.jdp_modified , substring(to_char(sysdate),0,20) )  

一些有用SQL:

-- 查看session

SELECT  t.* FROM SESSIONS t; 

-- 查看lock

SELECT * FROM locks;

-- 查看正在锁的session

SELECT s.session_id, s.transaction_id,l.object_name, l.transaction_description, l.lock_mode FROM SESSIONS s inner join locks l on s.transaction_id = l.transaction_id; -- 查看transactions

SELECT * FROM transactions ;

-- 表分析

SELECT ANALYZE_STATISTICS('jdp_tb_trade');

-- 关闭session  

SELECT CLOSE_SESSION('inman-10893:0x1d464');

--关闭全部session

SELECT CLOSE_ALL_SESSIONS();

posted on 2015-11-12 15:38  kenwong  阅读(1691)  评论(0编辑  收藏  举报

导航