vertica merge 优化
-- 查看RDS的订单数(MySQL)
select count(*) from ( SELECT tid, IF(LOCATE('pay_time', jdp_response)=0,null,SUBSTR(jdp_response,LOCATE('pay_time', jdp_response)+11,19)) pay_time FROM jdp_tb_trade where jdp_modified>='2015-11-11' ) t0 where t0.pay_time is not null and t0.pay_time>='2015-11-11 00:00:00' and t0.pay_time<'2015-11-12 00:00:00' ;
-- 402277
-- 查看同步并解析的订单数(Vertica) select count(*) from trade where jdp_modified >='2015-11-11 00:00:00' and pay_time >= '2015-11-11 00:00:00' and pay_time <'2015-11-12 00:00:00'
-- 402277
-- 查看数据仓库的订单数 select count(*) from rds_shop_order_detail where pay_time >= '2015-11-11 00:00:00' and pay_time <'2015-11-12 00:00:00';
-- 402253 数据仓库漏单了....
问题记录:
1\双11晚上,同步数据丢失5W多数据。 原因:未明 解决方法:重新指定同步数据时间区间。
2\修改增量同步判读字段modified为jdp_modified.
3\加快crontab频率导致错误。 原因:由于数据还在入库时,新的JOB清空临时表,导致表被锁。 解决方法:加快入库速度,优化SQL秒出,慢SQL需要30分钟以上
以下语句满足优化的merge的条件 3.1\目标表的关联列有主键 3.2\所有目标表的列都包括在更新插入语句中 3.3\插入、更新列的属性相等 特别注意2.2,这种写法在oracle语法会报错,而在vertica如果不这样写,merge是全表扫描!
未优化的SQL: MERGE INTO jdp_tb_trade a USING jdp_tb_trade_tmp b ON a.tid = b.tid WHEN MATCHED THEN UPDATE SET status = b.status , type = b.type , seller_nick = b.seller_nick , buyer_nick = b.buyer_nick , created = b.created , modified = b.modified , jdp_hashcode = b.jdp_hashcode , jdp_response = b.jdp_response , jdp_created = b.jdp_created , jdp_modified = b.jdp_modified , load_time = substring(to_char(sysdate),0,20) WHEN NOT MATCHED THEN INSERT ( tid , status , type , seller_nick , buyer_nick , created , modified , jdp_hashcode , jdp_response , jdp_created , jdp_modified , load_time ) VALUES ( b.tid , b.status , b.type , b.seller_nick , b.buyer_nick , b.created , b.modified , b.jdp_hashcode , b.jdp_response , b.jdp_created , b.jdp_modified , substring(to_char(sysdate),0,20) )
优化的SQL: MERGE INTO jdp_tb_trade a USING jdp_tb_trade_tmp b ON a.tid = b.tid WHEN MATCHED THEN UPDATE SET tid = b.tid , status = b.status , type = b.type , seller_nick = b.seller_nick , buyer_nick = b.buyer_nick , created = b.created , modified = b.modified , jdp_hashcode = b.jdp_hashcode , jdp_response = b.jdp_response , jdp_created = b.jdp_created , jdp_modified = b.jdp_modified , load_time = substring(to_char(sysdate),0,20) WHEN NOT MATCHED THEN INSERT ( tid , status , type , seller_nick , buyer_nick , created , modified , jdp_hashcode , jdp_response , jdp_created , jdp_modified , load_time ) VALUES ( b.tid , b.status , b.type , b.seller_nick , b.buyer_nick , b.created , b.modified , b.jdp_hashcode , b.jdp_response , b.jdp_created , b.jdp_modified , substring(to_char(sysdate),0,20) )
一些有用SQL:
-- 查看session
SELECT t.* FROM SESSIONS t;
-- 查看lock
SELECT * FROM locks;
-- 查看正在锁的session
SELECT s.session_id, s.transaction_id,l.object_name, l.transaction_description, l.lock_mode FROM SESSIONS s inner join locks l on s.transaction_id = l.transaction_id; -- 查看transactions
SELECT * FROM transactions ;
-- 表分析
SELECT ANALYZE_STATISTICS('jdp_tb_trade');
-- 关闭session
SELECT CLOSE_SESSION('inman-10893:0x1d464');
--关闭全部session
SELECT CLOSE_ALL_SESSIONS();