【Oracle】删除重复记录

--复习autotrace:
SET AUTOTRACE OFF               --不生成AUTOTRACE 报告,这是缺省模式
SET AUTOTRACE ON EXPLAIN        --AUTOTRACE只显示优化器执行路径报告
SET AUTOTRACE ON STATISTICS     --只显示执行统计信息
SET AUTOTRACE ON                --包含执行计划和统计信息
SET AUTOTRACE TRACEONLY         --同set autotrace on,但是不显示查询输出

--案例模拟
--查询原始表:
23:13:36 SQL> select * from emp;

     EMPNO ENAME      JOB              MGR HIREDATE            SAL       COMM     DEPTNO
---------- ---------- --------- ---------- ------------ ---------- ---------- ----------
      7698 BLAKE      MANAGER         7839 01-MAY-81          2850                    30
      7782 CLARK      MANAGER         7839 09-JUN-81          2450                    10
      7788 SCOTT      ANALYST         7566 19-APR-87          3000                    20
      7839 KING       PRESIDENT            17-NOV-81          5000                    10
      7844 TURNER     SALESMAN        7698 08-SEP-81          1500          0         30
      7876 ADAMS      CLERK           7788 23-MAY-87          1100                    20
      7900 JAMES      CLERK           7698 03-DEC-81           950                    30
      7902 FORD       ANALYST         7566 03-DEC-81          3000                    20
      7934 MILLER     CLERK           7782 23-JAN-82          1300                    10
      7369 SMITH      CLERK           7902 17-DEC-80           800                    20
      7499 ALLEN      SALESMAN        7698 20-FEB-81          1600        300         30
      7521 WARD       SALESMAN        7698 22-FEB-81          1250        500         30
      7566 JONES      MANAGER         7839 02-APR-81          2975                    20
      7654 MARTIN     SALESMAN        7698 28-SEP-81          1250       1400         30

14 rows selected.

Elapsed: 00:00:00.09

--模拟,插入重复记录
INSERT INTO emp select * from emp;

--检查重复记录
23:13:39 SQL> SELECT COUNT(empno), empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1;

COUNT(EMPNO)      EMPNO
------------ ----------
           2       7782
           2       7839
           2       7844
           2       7698
           2       7902
           2       7521
           2       7788
           2       7934
           2       7566
           2       7654
           2       7499
           2       7876
           2       7900
           2       7369

14 rows selected.

Elapsed: 00:00:00.04

--方法一(只保留ROWID最小的记录):
DELETE FROM emp
WHERE  empno IN
       (SELECT empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1)
AND    ROWID NOT IN
       (SELECT MIN(ROWID) FROM emp GROUP BY empno HAVING COUNT(empno) > 1);

[lubinsu@200-168-46-3 shell_monitor]$ sqlplus lubinsu/lubinsu

SQL*Plus: Release 10.2.0.1.0 - Production on Sun Aug 18 23:09:34 2013

Copyright (c) 1982, 2005, Oracle.  All rights reserved.


Connected to:
Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 - Production
With the Partitioning, OLAP and Data Mining options

23:09:46 SQL> set timing on
23:09:51 SQL> set autotrace traceonly
23:09:58 SQL> set linesize 200
23:17:32 SQL> DELETE FROM emp
23:17:38   2  WHERE  empno IN
23:17:40   3         (SELECT empno FROM emp GROUP BY empno HAVING COUNT(empno) > 1)
23:17:45   4  AND    ROWID NOT IN
23:17:49   5         (SELECT MIN(ROWID) FROM emp GROUP BY empno HAVING COUNT(empno) > 1);

14 rows deleted.

Elapsed: 00:00:00.07

Execution Plan
----------------------------------------------------------
Plan hash value: 319293636

------------------------------------------------------------------------------------
| Id  | Operation               | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
------------------------------------------------------------------------------------
|   0 | DELETE STATEMENT        |          |     1 |    41 |     9  (34)| 00:00:01 |
|   1 |  DELETE                 | EMP      |       |       |            |          |
|*  2 |   HASH JOIN ANTI        |          |     1 |    41 |     9  (34)| 00:00:01 |
|*  3 |    HASH JOIN SEMI       |          |     1 |    29 |     6  (34)| 00:00:01 |
|   4 |     TABLE ACCESS FULL   | EMP      |    14 |   224 |     2   (0)| 00:00:01 |
|   5 |     VIEW                | VW_NSO_1 |     1 |    13 |     3  (34)| 00:00:01 |
|*  6 |      FILTER             |          |       |       |            |          |
|   7 |       SORT GROUP BY     |          |     1 |     4 |     3  (34)| 00:00:01 |
|   8 |        TABLE ACCESS FULL| EMP      |    14 |    56 |     2   (0)| 00:00:01 |
|   9 |    VIEW                 | VW_NSO_2 |     1 |    12 |     3  (34)| 00:00:01 |
|* 10 |     FILTER              |          |       |       |            |          |
|  11 |      SORT GROUP BY      |          |     1 |    16 |     3  (34)| 00:00:01 |
|  12 |       TABLE ACCESS FULL | EMP      |    14 |   224 |     2   (0)| 00:00:01 |
------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access(ROWID="$nso_col_1")
   3 - access("EMPNO"="$nso_col_1")
   6 - filter(COUNT("EMPNO")>1)
  10 - filter(COUNT("EMPNO")>1)


Statistics
----------------------------------------------------------
          1  recursive calls
         15  db block gets
      16254  consistent gets
          0  physical reads
       4256  redo size
        925  bytes sent via SQL*Net to client
       1116  bytes received via SQL*Net from client
          6  SQL*Net roundtrips to/from client
          3  sorts (memory)
          0  sorts (disk)
         14  rows processed
         
--方法二、使用分析函数(只保留ROWID最小的记录):
23:17:57 SQL> rollback;

Rollback complete.

Elapsed: 00:00:00.02
23:19:11 SQL> DELETE FROM emp
23:22:22   2  WHERE  ROWID IN (SELECT ROWID
23:22:28   3                   FROM   (SELECT ROWID row_id,
23:22:32   4                                  row_number() over(PARTITION BY empno ORDER BY ROWID ASC) row_num
23:22:37   5                           FROM   emp e) v
23:22:41   6                   WHERE  v.row_num >= 2);

14 rows deleted.

Elapsed: 00:00:00.04

Execution Plan
----------------------------------------------------------
Plan hash value: 3671588261

-----------------------------------------------------------------------------------------
| Id  | Operation                    | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
-----------------------------------------------------------------------------------------
|   0 | DELETE STATEMENT             |          |     1 |    24 |     5  (40)| 00:00:01 |
|   1 |  DELETE                      | EMP      |       |       |            |          |
|   2 |   NESTED LOOPS               |          |     1 |    24 |     5  (40)| 00:00:01 |
|   3 |    VIEW                      | VW_NSO_1 |    14 |   168 |     3  (34)| 00:00:01 |
|   4 |     SORT UNIQUE              |          |     1 |   350 |            |          |
|*  5 |      VIEW                    |          |    14 |   350 |     3  (34)| 00:00:01 |
|   6 |       WINDOW SORT            |          |    14 |   224 |     3  (34)| 00:00:01 |
|   7 |        TABLE ACCESS FULL     | EMP      |    14 |   224 |     2   (0)| 00:00:01 |
|   8 |    TABLE ACCESS BY USER ROWID| EMP      |     1 |    12 |     1   (0)| 00:00:01 |
-----------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   5 - filter("V"."ROW_NUM">=2)


Statistics
----------------------------------------------------------
          1  recursive calls
         18  db block gets
       5434  consistent gets
          0  physical reads
       4448  redo size
        925  bytes sent via SQL*Net to client
       1190  bytes received via SQL*Net from client
          6  SQL*Net roundtrips to/from client
          3  sorts (memory)
          0  sorts (disk)
         14  rows processed

--方法三、
23:22:45 SQL> DELETE FROM emp p WHERE ROWID NOT IN(SELECT MAX(ROWID) FROM emp i WHERE i.empno = p.empno);

0 rows deleted.

Elapsed: 00:00:00.13

Execution Plan
----------------------------------------------------------
Plan hash value: 1987023575

---------------------------------------------------------------------------------
| Id  | Operation             | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------
|   0 | DELETE STATEMENT      |         |    13 |   533 |     6  (34)| 00:00:01 |
|   1 |  DELETE               | EMP     |       |       |            |          |
|*  2 |   HASH JOIN           |         |    13 |   533 |     6  (34)| 00:00:01 |
|   3 |    VIEW               | VW_SQ_1 |    14 |   350 |     3  (34)| 00:00:01 |
|   4 |     SORT GROUP BY     |         |    14 |   224 |     3  (34)| 00:00:01 |
|   5 |      TABLE ACCESS FULL| EMP     |    14 |   224 |     2   (0)| 00:00:01 |
|   6 |    TABLE ACCESS FULL  | EMP     |    14 |   224 |     2   (0)| 00:00:01 |
---------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access("EMPNO"="P"."EMPNO")
       filter(ROWID<>"VW_COL_1")


Statistics
----------------------------------------------------------
        168  recursive calls
          0  db block gets
      10954  consistent gets
          0  physical reads
          0  redo size
        925  bytes sent via SQL*Net to client
       1008  bytes received via SQL*Net from client
          6  SQL*Net roundtrips to/from client
          2  sorts (memory)
          0  sorts (disk)
          0  rows processed
          
--方法四、和方法三是类似的:
Elapsed: 00:00:00.03
23:28:25 SQL> DELETE FROM emp p WHERE ROWID < (SELECT MAX(ROWID) FROM emp i WHERE i.empno = p.empno);

14 rows deleted.

Elapsed: 00:00:00.06

Execution Plan
----------------------------------------------------------
Plan hash value: 1987023575

---------------------------------------------------------------------------------
| Id  | Operation             | Name    | Rows  | Bytes | Cost (%CPU)| Time     |
---------------------------------------------------------------------------------
|   0 | DELETE STATEMENT      |         |     1 |    41 |     6  (34)| 00:00:01 |
|   1 |  DELETE               | EMP     |       |       |            |          |
|*  2 |   HASH JOIN           |         |     1 |    41 |     6  (34)| 00:00:01 |
|   3 |    VIEW               | VW_SQ_1 |    14 |   350 |     3  (34)| 00:00:01 |
|   4 |     SORT GROUP BY     |         |    14 |   224 |     3  (34)| 00:00:01 |
|   5 |      TABLE ACCESS FULL| EMP     |    14 |   224 |     2   (0)| 00:00:01 |
|   6 |    TABLE ACCESS FULL  | EMP     |    14 |   224 |     2   (0)| 00:00:01 |
---------------------------------------------------------------------------------

--方法五、使用group by,同样类似三四
23:30:01 SQL> DELETE FROM emp p WHERE ROWID NOT IN (SELECT MAX(ROWID) FROM emp i GROUP BY i.empno);

14 rows deleted.

Elapsed: 00:00:00.06

Execution Plan
----------------------------------------------------------
Plan hash value: 3239269824

----------------------------------------------------------------------------------
| Id  | Operation             | Name     | Rows  | Bytes | Cost (%CPU)| Time     |
----------------------------------------------------------------------------------
|   0 | DELETE STATEMENT      |          |     1 |    24 |     6  (34)| 00:00:01 |
|   1 |  DELETE               | EMP      |       |       |            |          |
|*  2 |   HASH JOIN ANTI      |          |     1 |    24 |     6  (34)| 00:00:01 |
|   3 |    TABLE ACCESS FULL  | EMP      |    14 |   168 |     2   (0)| 00:00:01 |
|   4 |    VIEW               | VW_NSO_1 |    14 |   168 |     3  (34)| 00:00:01 |
|   5 |     SORT GROUP BY     |          |    14 |   224 |     3  (34)| 00:00:01 |
|   6 |      TABLE ACCESS FULL| EMP      |    14 |   224 |     2   (0)| 00:00:01 |
----------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   2 - access(ROWID="$nso_col_1")


Statistics
----------------------------------------------------------
          0  recursive calls
         17  db block gets
      10836  consistent gets
          0  physical reads
       4324  redo size
        926  bytes sent via SQL*Net to client
       1002  bytes received via SQL*Net from client
          6  SQL*Net roundtrips to/from client
          2  sorts (memory)
          0  sorts (disk)
         14  rows processed

--总结,还是自己想出来的方法二效率高,:)



posted @ 2013-08-19 19:55  pangbangb  阅读(210)  评论(0编辑  收藏  举报