MYSQL 查找单个字段或者多个字段重复数据,清除重复数据
重复数据的情况分为两种:
单个字段 数据重复
多个字段 数据重复
所以该篇的内容就是包括
单个字段的重复数据查找 与 去重
多个字段的重复数据查找 与 去重
第一步 先查出来所有重复的数据
select t.id,t.subject_id,t.image_id,t.mark_person_id from dms_mark_image_info t,
(
select id,subject_id,image_id,count(image_id),mark_person_id,count(mark_person_id) from dms_mark_image_info group by mark_person_id,image_id having (count(mark_person_id) > 1) and (count(image_id) > 1)
) a
where t.image_id = a.image_id and t.mark_person_id = a.mark_person_id;
select count(1) from dms_mark_image_info;
第二步 去重 在每一个相同记录里面保留其中一条 (id最小的一条记录)
select min(id) as id from
(
select t.id,t.subject_id,t.image_id,t.mark_person_id from dms_mark_image_info t,
(
select id,subject_id,image_id,count(image_id),mark_person_id,count(mark_person_id) from dms_mark_image_info group by mark_person_id,image_id having (count(mark_person_id) > 1) and (count(image_id) > 1)
) a
where t.image_id = a.image_id and t.mark_person_id = a.mark_person_id
第三步 就是从涉及重复数据里面,找出除了我们需要保留的id之外的那批id, 那就是需要我们执行删除的数据id。
select b.id from (
select t.id,t.subject_id,t.image_id,t.mark_person_id from dms_mark_image_info t,
(
select id,subject_id,image_id,count(image_id),mark_person_id,count(mark_person_id) from dms_mark_image_info group by mark_person_id,image_id having (count(mark_person_id) > 1) and (count(image_id) > 1)
) a
where t.image_id = a.image_id and t.mark_person_id = a.mark_person_id
) b
where b.id not in
(
select min(id) as id from
(
select t.id,t.subject_id,t.image_id,t.mark_person_id from dms_mark_image_info t,
(
select id,subject_id,image_id,count(image_id),mark_person_id,count(mark_person_id) from dms_mark_image_info group by mark_person_id,image_id having (count(mark_person_id) > 1) and (count(image_id) > 1)
) a
where t.image_id = a.image_id and t.mark_person_id = a.mark_person_id
) a group by a.image_id,a.mark_person_id
);
第四步 执行删除重复的数据
delete from dms_mark_image_info where id in
(
select b.id from (
select t.id,t.subject_id,t.image_id,t.mark_person_id from dms_mark_image_info t,
(
select id,subject_id,image_id,count(image_id),mark_person_id,count(mark_person_id) from dms_mark_image_info group by mark_person_id,image_id having (count(mark_person_id) > 1) and (count(image_id) > 1)
) a
where t.image_id = a.image_id and t.mark_person_id = a.mark_person_id
) b
where b.id not in
(
select min(id) as id from
(
select t.id,t.subject_id,t.image_id,t.mark_person_id from dms_mark_image_info t,
(
select id,subject_id,image_id,count(image_id),mark_person_id,count(mark_person_id) from dms_mark_image_info group by mark_person_id,image_id having (count(mark_person_id) > 1) and (count(image_id) > 1)
) a
where t.image_id = a.image_id and t.mark_person_id = a.mark_person_id
) a group by a.image_id,a.mark_person_id
)
);
作者:三尺青锋
本文链接:https://www.cnblogs.com/zhangjiale/p/17394397.html
本文来自博客园,作者:Java-三尺青锋,转载请注明原文链接:https://www.cnblogs.com/zhangjiale/p/17394397.html