面试题: mysql 数据库去重 已看1 不好使
mysql去重面试总结
前言:题目大概是这样的。
建表:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
CREATE TABLE `test2` ( `id` int (11) NOT NULL AUTO_INCREMENT, `peopleId` int (11) DEFAULT NULL , ` name ` varchar (255) DEFAULT NULL , PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=7 DEFAULT CHARSET=utf8; -- ---------------------------- -- Records of test2 -- ---------------------------- INSERT INTO `test2` VALUES ( '1' , '1' , '倒一' ); INSERT INTO `test2` VALUES ( '2' , '1' , '倒一' ); INSERT INTO `test2` VALUES ( '3' , '3' , '等等' ); INSERT INTO `test2` VALUES ( '4' , '2' , '421' ); INSERT INTO `test2` VALUES ( '5' , '2' , '421' ); INSERT INTO `test2` VALUES ( '6' , '2' , '421' ); |
1、查找表中多余的重复记录,重复记录是根据单个字段(Id)来判断
网上答案:select * from 表 where Id in (select Id from 表 group byId having count(Id) > 1)
mysql:select * from test2 where id in (select id from test2 group by peopleId having count(peopleId) > 1)
2、删除表中多余的重复记录,重复记录是根据单个字段(Id)来判断,只留有rowid最小的记录
网上答案:DELETE from 表 WHERE (id) IN ( SELECT id FROM 表 GROUP BY id HAVING COUNT(id) > 1) AND ROWID NOT IN (SELECT MIN(ROWID) FROM 表 GROUP BY id HAVING COUNT(*) > 1);
mysql:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
|
DELETE FROM test2 WHERE peopleId IN ( select a.peopleId FROM ( SELECT * FROM test2 GROUP BY peopleId HAVING count (peopleId) > 1 )a ) AND id NOT IN ( select b.id FROM ( SELECT * FROM test2 GROUP BY name HAVING count ( name ) > 1 )b ) |
3、查找表中多余的重复记录(多个字段)
select * from 表 a where (a.Id,a.seq) in(select Id,seq from 表 group by Id,seq having count(*) > 1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
SELECT * FROM test3 a WHERE (a.id, a.seq) IN ( SELECT id, seq FROM test3 GROUP BY id, seq HAVING count (*) > 1 ) |
4、删除表中多余的重复记录(多个字段),只留有rowid最小的记录
delete from 表 a where (a.Id,a.seq) in (select Id,seq from 表 group by Id,seq having count() > 1) and rowid not in (select min(rowid) from 表 group by Id,seq having count()>1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
DELETE FROM test3 WHERE (id, seq) IN ( SELECT a.id, a.seq FROM ( SELECT id, seq FROM test3 GROUP BY id, seq HAVING count (*) > 1 ) a ) AND (id, seq, ` name `) NOT IN ( SELECT b.* FROM ( SELECT * FROM test3 GROUP BY id, seq HAVING count (*) > 1 ) b ) |
5、查找表中多余的重复记录(多个字段),不包含rowid最小的记录
select * from 表 a where (a.Id,a.seq) in (select Id,seq from 表 group by Id,seq having count() > 1) and rowid not in (select min(rowid) from 表 group by Id,seq having count()>1)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
|
select * FROM test3 WHERE (id, seq) IN ( SELECT a.id, a.seq FROM ( SELECT id, seq FROM test3 GROUP BY id, seq HAVING count (*) > 1 ) a ) AND (id, seq, ` name `) NOT IN ( SELECT b.* FROM ( SELECT * FROM test3 GROUP BY id, seq HAVING count (*) > 1 ) b ) |
胜负查询:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
CREATE TABLE `t_game` ( `game_date` varchar (255) DEFAULT NULL , `game_res` varchar (255) DEFAULT NULL ) ENGINE=InnoDB DEFAULT CHARSET=utf8; -- ---------------------------- -- Records of t_game -- ---------------------------- INSERT INTO `t_game` VALUES ( '2018-03-20' , '胜' ); INSERT INTO `t_game` VALUES ( '2018-03-20' , '胜' ); INSERT INTO `t_game` VALUES ( '2018-03-20' , '负' ); INSERT INTO `t_game` VALUES ( '2018-03-21' , '负' ); INSERT INTO `t_game` VALUES ( '2018-03-21' , '胜' ); INSERT INTO `t_game` VALUES ( '2018-03-21' , '负' ); |
1
2
3
|
select game_date,( select count (*) from t_game where game_date = t.game_date and game_res = '胜' ) as '胜' ,( select count (*) from t_game where game_date = t.game_date and game_res = '负' ) as '负' from t_game as t group by game_date; |
总结
1、delete不能有别名
2、mysql不支持又查又改,要用临时表
3、mysql不支持rowid