MariaDB删除重复记录性能测试

删除重复记录,只保留id最大的一条记录的性能测试

环境

测试表的id为是唯一的,或是自增的主键。

mysql不能直接写循环,只能写在存储过程里。

存储过程usp_batch_insert的参数num_count为插入总行数,参数batch_commit为每批提交的行数。

由于是测试,先把bin log关闭。在生产上做删除重复记录操作,不能随意关闭,根据业务而定。

SET session sql_log_bin = 0;

创建测试表t3

CREATE TABLE `t3` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `f1` varchar(32) DEFAULT NULL,
  `f2` varchar(32) DEFAULT NULL,
  `ctime` datetime(3) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

每批量提交的记录表t3_log

CREATE TABLE `t3_log` (
  `id` int(10) unsigned NOT NULL AUTO_INCREMENT,
  `i` bigint(20) DEFAULT NULL,
  `ctime` datetime(3) DEFAULT NULL,
  PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;

构造数据的存储过程

DELIMITER $$
DROP PROCEDURE IF EXISTS `usp_batch_insert`;
CREATE PROCEDURE `usp_batch_insert`(IN `num_count` int,IN `batch_commit` int)
BEGIN
DECLARE	i INT; 
SET i = 1;
SET AUTOCOMMIT = 0;

WHILE i <= num_count DO 
  INSERT INTO t3 (f1, f2, ctime) 
  SELECT REPLACE(UUID(), '-', '') AS a, REPLACE(UUID(), '-', '') AS b, NOW(3) AS c;
  SET i = i + 1;
  IF MOD(i, batch_commit) <= 0 THEN
    INSERT INTO t3_log (i, ctime) VALUES (i, NOW(3));
      COMMIT;
    END IF;
END WHILE; 

SET AUTOCOMMIT = 1;
END; $$
DELIMITER ;

生成200万的测试数据

CALL usp_batch_insert(2000000, 5000);

把一部分数据重复

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 123456,10000;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 15234567,254321;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 345678,654321;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 654321,45678;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 886,123456;

INSERT INTO t3 (f1,f2,ctime) 
SELECT f1, f2, NOW(3) FROM t3 LIMIT 15,2000;

数据已经构造完,全表280多万行记录,需要删除的数据有80多万。

下面就来测试下全过程的时间,总耗时是216秒,其中删除部分约29秒。

如果要保留最小id的那行记录,则把max()函数修改为min()函数。

CREATE OR REPLACE TABLE _tmp_t3 (id INT NOT NULL PRIMARY KEY);

INSERT INTO _tmp_t3 (id)
SELECT id
FROM t3 
WHERE id NOT IN (
		SELECT maxid FROM 
				(SELECT max(id) AS  maxid FROM t3
						GROUP BY f1, f2				
				) b
);

DELETE a FROM t3 as a INNER JOIN _tmp_t3 as b on b.id = a.id;

如果29秒可能会影响业务,可以做成存储过程,分批删除。

DELIMITER $$
DROP PROCEDURE IF EXISTS `usp_batch_delete`;

CREATE PROCEDURE `usp_batch_delete`(IN `batch_commit` int)
BEGIN
DECLARE i INT; 
DECLARE num_count INT;
SET i = 1; 
SELECT MAX(id) INTO num_count FROM _tmp_t3;
SET AUTOCOMMIT = 0;

WHILE i <= num_count DO

DELETE a FROM t3 as a INNER JOIN _tmp_t3 as b on b.id = a.id AND b.id = i;
SET i = i + 1;

IF MOD(1, batch_commit) >= 0 THEN
COMMIT;
END IF;

END WHILE;

SET AUTOCOMMIT = 1;
END; $$
DELIMITER ;



posted @ 2018-06-13 13:38  FishParadise  阅读(332)  评论(0编辑  收藏  举报