Oracle表连接的优化案例(十七)
Nested Loops Join 请确保用在局部扫描的OLTP场景:
驱动表的限制条件有索引
/* 结论: Nested Loops Join连接优化,驱动表的限制条件有索引! */ --环境构造 --研究Nested Loops Join访问次数前准备工作 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 100 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; select count(*) from t1; select count(*) from t2; --Nested Loops Join两表无索引试验 set linesize 1000 set autotrace off alter session set statistics_level=all ; SELECT /*+ leading(t1) use_nl(t2) */ * FROM t1, t2 WHERE t1.id = t2.t1_id AND t1.n = 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | ------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 1014 | | 1 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.01 | 1014 | |* 2 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 8 | |* 3 | TABLE ACCESS FULL| T2 | 1 | 1 | 1 |00:00:00.01 | 1006 | ------------------------------------------------------------------------------------- 2 - filter("T1"."N"=19) 3 - filter("T1"."ID"="T2"."T1_ID") ----两表无索引场合如果不用HINT,一般走Hash Join alter session set statistics_level=all ; SELECT * FROM t1, t2 WHERE t1.id = t2.t1_id AND t1.n = 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ---------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.06 | 1013 | | | | |* 1 | HASH JOIN | | 1 | 1 | 1 |00:00:00.06 | 1013 | 742K| 742K| 376K (0)| |* 2 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 7 | | | | | 3 | TABLE ACCESS FULL| T2 | 1 | 100K| 100K|00:00:00.02 | 1006 | | | | ---------------------------------------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 1 - access("T1"."ID"="T2"."T1_ID") 2 - filter("T1"."N"=19) ---对t1表的限制条件建索引 CREATE INDEX t1_n ON t1 (n); ---有了限制条件的索引,Nested Loops Join性能略有提升 set linesize 1000 alter session set statistics_level=all ; SELECT /*+ leading(t1) use_nl(t2) */ * FROM t1, t2 WHERE t1.id = t2.t1_id AND t1.n = 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); -------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | Reads | -------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.06 | 1009 | 1007 | | 1 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.06 | 1009 | 1007 | | 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | 6 | |* 3 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | 1 | |* 4 | TABLE ACCESS FULL | T2 | 1 | 1 | 1 |00:00:00.05 | 1006 | 1001 | -------------------------------------------------------------------------------------------------------- 3 - access("T1"."N"=19) 4 - filter("T1"."ID"="T2"."T1_ID") ---不过发现,增加了索引后Oracle不用HINT,还是走HASH连接。 alter session set statistics_level=all ; SELECT * FROM t1, t2 WHERE t1.id = t2.t1_id AND t1.n = 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); -------------------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | -------------------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.05 | 1008 | | | | |* 1 | HASH JOIN | | 1 | 1 | 1 |00:00:00.05 | 1008 | 742K| 742K| 350K (0)| | 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 2 | | | | |* 3 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 1 | | | | | 4 | TABLE ACCESS FULL | T2 | 1 | 89127 | 100K|00:00:00.02 | 1006 | | | | -------------------------------------------------------------------------------------------------------------------------- 1 - access("T1"."ID"="T2"."T1_ID") 3 - access("T1"."N"=19) /* 原因在于,这个T1表总记录也不过100条,所以用索引效果并没有很明显,如果这个T1表记录有几十万上百万条,那检索一条记录出来 ,用索引效果就非常明显了! */
被驱动表限制条件有索引
/* 结论: 给Nested Loops Join连接优化,被驱动表的连接条件有索引! */ --环境构造 --研究Nested Loops Join访问次数前准备工作 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 100 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; select count(*) from t1; select count(*) from t2; --(对驱动表t1表的限制条件建索引),如下 CREATE INDEX t1_n ON t1 (n); --(对被驱动表t2表的连接条件建索引),如下: CREATE INDEX t2_t1_id ON t2(t1_id); ----这下表连接性能有了大幅度提升 alter session set statistics_level=all ; SELECT /*+ leading(t1) use_nl(t2) */ * FROM t1, t2 WHERE t1.id = t2.t1_id AND t1.n = 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | Reads| ----------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 7 | 4 | | 1 | NESTED LOOPS | | 1 | | 1 |00:00:00.01 | 7 | 4 | | 2 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.01 | 6 | 4 | | 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | 0 | |* 4 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | 0 | |* 5 | INDEX RANGE SCAN | T2_T1_ID | 1 | 1 | 1 |00:00:00.01 | 3 | 4 | | 6 | TABLE ACCESS BY INDEX ROWID | T2 | 1 | 1 | 1 |00:00:00.01 | 1 | 0 | ----------------------------------------------------------------------------------------------------------- 4 - access("T1"."N"=19) 5 - access("T1"."ID"="T2"."T1_ID") --性能有了大幅度提升,BUFFERS居然只有7 ---增加了索引后Oracle不用HINT,终于自己去选择Nested Loops Join alter session set statistics_level=all ; SELECT * FROM t1, t2 WHERE t1.id = t2.t1_id AND t1.n = 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | ---------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 7 | | 1 | NESTED LOOPS | | 1 | | 1 |00:00:00.01 | 7 | | 2 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.01 | 6 | | 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | |* 4 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | |* 5 | INDEX RANGE SCAN | T2_T1_ID | 1 | 1 | 1 |00:00:00.01 | 3 | | 6 | TABLE ACCESS BY INDEX ROWID | T2 | 1 | 1 | 1 |00:00:00.01 | 1 | ---------------------------------------------------------------------------------------------------- 4 - access("T1"."N"=19) 5 - access("T1"."ID"="T2"."T1_ID")
确保小结果集先驱动
/* 结论: Nested Loops Join连接优化,注意驱动表的结果集是否是小的 (在统计信息不准确的时候,经常会出现将大的结果集驱动的情况,需要我们介入判断分析) */ --环境构造 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE INDEX t1_n ON t1 (n); CREATE INDEX t2_t1_id ON t2(t1_id); --然后继续进入SESSION,执行 execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 10000 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T1',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ; exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T2',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ; select count(*) from t1; select count(*) from t2; ----开始试验(正常是小的结果集先访问): set linesize 1000 alter session set statistics_level=all ; SELECT * FROM t1, t2 WHERE t1.id = t2.t1_id AND t1.n <= 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | ---------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 19 |00:00:00.01 | 53 | | 1 | NESTED LOOPS | | 1 | | 19 |00:00:00.01 | 53 | | 2 | NESTED LOOPS | | 1 | 16 | 19 |00:00:00.01 | 34 | | 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 16 | 19 |00:00:00.01 | 23 | |* 4 | INDEX RANGE SCAN | T1_N | 1 | 16 | 19 |00:00:00.01 | 4 | |* 5 | INDEX RANGE SCAN | T2_T1_ID | 19 | 1 | 19 |00:00:00.01 | 11 | | 6 | TABLE ACCESS BY INDEX ROWID | T2 | 19 | 1 | 19 |00:00:00.01 | 19 | ---------------------------------------------------------------------------------------------------- 4 - access("T1"."N"<=19) 5 - access("T1"."ID"="T2"."T1_ID") /* 构造如下: 假如oracle的统计信息不准确 故意错乱颠倒,欺骗Oracle说T1表是小表而T2表是大表。 */ EXEC dbms_stats.SET_table_stats(user, 'T1', numrows => 20000000 ,numblks => 1000000); EXEC dbms_stats.SET_table_stats(user, 'T2', numrows => 1 ,numblks => 1); --结果顺序颠倒了,性能大幅度下降! set linesize 1000 alter session set statistics_level=all ; SELECT * FROM t1, t2 WHERE t1.id t2.t1_id AND t1.n <= 19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); -------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | Reads | -------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 19 |00:00:03.93 | 1801K| 1031 | | 1 | NESTED LOOPS | | 1 | | 19 |00:00:03.93 | 1801K| 1031 | | 2 | NESTED LOOPS | | 1 | 1 | 1900K|00:00:01.17 | 1019 | 1006 | | 3 | TABLE ACCESS FULL | T2 | 1 | 1 | 100K|00:00:00.08 | 1007 | 1001 | |* 4 | INDEX RANGE SCAN | T1_N | 100K| 10000 | 1900K|00:00:00.63 | 12 | 5 | |* 5 | TABLE ACCESS BY INDEX ROWID| T1 | 1900K| 1 | 19 |00:00:02.22 | 1800K| 25 | -------------------------------------------------------------------------------------------------------- 4 - access("T1"."N"<=19) 5 - filter("T1"."ID"="T2"."T1_ID")
Hash Join确保在全表扫描的OLAP场景:
两表限制条件有索引
/* 结论: Hash Join连接优,两表的限制条件有索引(注:针对索引条件返回记录很少的情况)! */ --环境构造 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 10000 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; select count(*) from t1; select count(*) from t2; --Hash连接优化第1式,两边的限制条件有索引 --首先测试Hash Join两表的限制条件皆无索引的情况 alter session set statistics_level=all ; set linesize 1000 SELECT /*+ leading(t2) use_hash(t1)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19 and t2.n=12; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ---------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1104 | | | | |* 1 | HASH JOIN | | 1 | 1 | 0 |00:00:00.01 | 1104 | 738K| 738K| 342K (0)| |* 2 | TABLE ACCESS FULL| T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | | |* 3 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 99 | | | | ---------------------------------------------------------------------------------------------------------------- 1 - access("T1"."ID"="T2"."T1_ID") 2 - filter("T2"."N"=12) 3 - filter("T1"."N"=19) ---在两表的限制条件建索引后,发现索引都可以用到,性能大幅度提升 --首先在t1表的限制条件建索引的情况,测试发现性能果然有提升! create index idx_t1_n on t1(n); SELECT /*+ leading(t2) use_hash(t1)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19 and t2.n=12; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ------------------------------------------------------------------------------------------------------------------------------ | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ------------------------------------------------------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1008 | | | | |* 1 | HASH JOIN | | 1 | 1 | 0 |00:00:00.01 | 1008 | 738K| 738K| 345K (0)| |* 2 | TABLE ACCESS FULL | T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | | | 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | | |* 4 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | | ------------------------------------------------------------------------------------------------------------------------------ 1 - access("T1"."ID"="T2"."T1_ID") 2 - filter("T2"."N"=12) --以上是create index idx_t1_n on t1(n)后的情况 --接下来在t2表的限制条件再建索引,又更快了! create index idx_t2_n on t2(n); SELECT /*+ leading(t2) use_hash(t1)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19 and t2.n=12; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ------------------------------------------------------------------------------------------------------------------------------ | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ------------------------------------------------------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 6 | | | | |* 1 | HASH JOIN | | 1 | 1 | 0 |00:00:00.01 | 6 | 738K| 738K| 367K (0)| | 2 | TABLE ACCESS BY INDEX ROWID| T2 | 1 | 1 | 1 |00:00:00.01 | 3 | | | | |* 3 | INDEX RANGE SCAN | IDX_T2_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | | | 4 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | | |* 5 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | | ------------------------------------------------------------------------------------------------------------------------------ 1 - access("T1"."ID"="T2"."T1_ID") 3 - access("T2"."N"=12) 5 - access("T1"."N"=19) --以上是create index idx_t2_n on t2(n)后的情况
小结果集驱动
/* 结论: Hash Join连接优化,注意驱动表的结果集是否是小的 (在统计信息不准确的时候,经常会出现将大的结果集驱动的情况,需要我们介入判断分析) */ DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 100 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T1',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ; exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T2',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ; select count(*) from t1; select count(*) from t2; --在无索引,且是全扫描的情况下,一般走HASH连接,看下面性能 set linesize 1000 alter session set statistics_level=all; SELECT * FROM t1, t2 WHERE t1.id = t2.t1_id; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ---------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.07 | 1019 | | | | |* 1 | HASH JOIN | | 1 | 100 | 100 |00:00:00.07 | 1019 | 742K| 742K| 1202K (0)| | 2 | TABLE ACCESS FULL| T1 | 1 | 100 | 100 |00:00:00.01 | 7 | | | | | 3 | TABLE ACCESS FULL| T2 | 1 | 89127 | 100K|00:00:00.02 | 1012 | | | | ---------------------------------------------------------------------------------------------------------------- 1 - access("T1"."ID"="T2"."T1_ID") /* 以下是经常出现的案例由于统计信息的错误导致执行计划的错误,我们构造如下: 假如oracle的统计信息不准确 以下故意错乱颠倒,欺骗Oracle说T1表是小表而T2表是大表。 */ EXEC dbms_stats.SET_table_stats(user, 'T1', numrows => 20000000 ,numblks => 1000000); EXEC dbms_stats.SET_table_stats(user, 'T2', numrows => 1 ,numblks => 1); set linesize 1000 alter session set statistics_level=all; SELECT * FROM t1, t2 WHERE t1.id = t2.t1_id; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ---------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.10 | 1019 | | | | |* 1 | HASH JOIN | | 1 | 20M| 100 |00:00:00.10 | 1019 | 9472K| 1956K| 9M (0)| | 2 | TABLE ACCESS FULL| T2 | 1 | 1 | 100K|00:00:00.02 | 1005 | | | | | 3 | TABLE ACCESS FULL| T1 | 1 | 20M| 100 |00:00:00.01 | 14 | | | | ---------------------------------------------------------------------------------------------------------------- 1 - access("T1"."ID"="T2"."T1_ID")
确保在PGA完成HASH运算的尺寸
这里的场景是hash连接占用HASH AREA内存区过多时,这时候我们可以考虑增大PGA 如果是oracle11g,默认是直接增大memory_target Hash Join算法 第1步:判定小表是否能够全部存放在hash area内存中,如果可以,则做内存hash join。如果不行,转第二步。 第2步:决定fan-out数。(Number of Partitions) * C<= Favm *M 其中C为Cluster size, 其值为DB_BLOCK_SIZE*HASH_MULTIBLOCK_IO_COUNT;Favm为hash area内存可以使用的百分比,一般为0.8左右;M为Hash_area_size的大小。 第3步: 读取部分小表S,采用内部hash函数(这里称为hash_fun_1),将连接键值映射至某个分区,同时采用hash_fun_2函数对连接键值产生另外一个hash值,这个hash值用于创建hash table用,并且与连接键值存放在一起。 第4步: 对build input建立位图向量。 第5步: 如果内存中没有空间了,则将分区写至磁盘上。 第6步: 读取小表S的剩余部分,重复第三步,直至小表S全部读完。 第7步: 将分区按大小排序,选取几个分区建立hash table(这里选取分区的原则是使选取的数量最多)。 第8步: 根据前面用hash_fun_2函数计算好的hash值,建立hash table。 第9步: 读取表B,采用位图向量进行位图向量过滤。 第10步:对通过过滤的数据采用hash_fun_1函数将数据映射到相应的分区中去,并计算hash_fun_2的hash值。 第11步:如果所落的分区在内存中,则将前面通过hash_fun_2函数计算所得的hash值与内存中已存在的hash table做连接,将结果写到磁盘上。如果所落的分区不在内存中,则将相应的值与表S相应的分区放在一起。 第12步:继续读取表B,重复第9步,直至表B读取完毕。 第13步:读取相应的(Si,Bi)做hash连接。在这里会发生动态角色互换。 第14步:如果分区过后,最小的分区也比内存大,则发生nested- loop hash join。
Merge Sort Join优化:
两表限制条件有索引
/* 结论: Merge Sort Join连接优化,两表的限制条件有索引(注:针对索引条件返回记录很少的情况)! */ --环境构造 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 10000 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; select count(*) from t1; select count(*) from t2; --两边的限制条件有索引 --首先是,两表限制条件皆无索引的情况,如下 alter session set statistics_level=all ; set linesize 1000 SELECT /*+ leading(t2) use_merge(t1)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19 and t2.n=12; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ----------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ----------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1104 | | | | | 1 | MERGE JOIN | | 1 | 1 | 0 |00:00:00.01 | 1104 | | | | | 2 | SORT JOIN | | 1 | 11 | 1 |00:00:00.01 | 1005 | 2048 | 2048 | 2048 (0)| |* 3 | TABLE ACCESS FULL| T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | | |* 4 | SORT JOIN | | 1 | 1 | 0 |00:00:00.01 | 99 | 2048 | 2048 | 2048 (0)| |* 5 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 99 | | | | ----------------------------------------------------------------------------------------------------------------- 3 - filter("T2"."N"=12) 4 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 5 - filter("T1"."N"=19) ---在两表的限制条件建索引后,发现索引都可以用到,性能大幅度提升 --首先对t1表的限制条件建索引,发现如下Merge Sort Join快了。 create index idx_t1_n on t1(n); SELECT /*+ leading(t2) use_merge(t1)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19 and t2.n=12; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ------------------------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ------------------------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1008 | | | | | 1 | MERGE JOIN | | 1 | 1 | 0 |00:00:00.01 | 1008 | | | | | 2 | SORT JOIN | | 1 | 11 | 1 |00:00:00.01 | 1005 | 2048 | 2048 | 2048 (0)| |* 3 | TABLE ACCESS FULL | T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | | |* 4 | SORT JOIN | | 1 | 1 | 0 |00:00:00.01 | 3 | 2048 | 2048 | 2048 (0)| | 5 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | | |* 6 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | | ------------------------------------------------------------------------------------------------------------------------------- 3 - filter("T2"."N"=12) 4 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 6 - access("T1"."N"=19) --接下来对t2表的限制条件建索引,发现如下Merge Sort Join更快了。 create index idx_t2_n on t2(n); SELECT /*+ leading(t2) use_merge(t1)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19 and t2.n=12; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ------------------------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ------------------------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 6 | | | | | 1 | MERGE JOIN | | 1 | 1 | 0 |00:00:00.01 | 6 | | | | | 2 | SORT JOIN | | 1 | 1 | 1 |00:00:00.01 | 3 | 2048 | 2048 | 2048 (0)| | 3 | TABLE ACCESS BY INDEX ROWID| T2 | 1 | 1 | 1 |00:00:00.01 | 3 | | | | |* 4 | INDEX RANGE SCAN | IDX_T2_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | | |* 5 | SORT JOIN | | 1 | 1 | 0 |00:00:00.01 | 3 | 2048 | 2048 | 2048 (0)| | 6 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | | |* 7 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | | ------------------------------------------------------------------------------------------------------------------------------- 4 - access("T2"."N"=12) 5 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 7 - access("T1"."N"=19)
连接条件索引消除排序
/* 结论: Merge Sort Join连接优化,在连接条件字段上建索引,用以消除排序合并连接的排序动作! */ --环境构造 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 100 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; select count(*) from t1; select count(*) from t2; --争取利用索引来消除排序(可惜的是,ORACLE算法的限制,只能避免一次排序) ---首先看两表的连接条件都无索引的情况,如下,有两次排序: set linesize 1000 set autotrace traceonly SELECT /*+ leading(t1) use_merge(t2)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id; 执行计划 ------------------------------------------------------------------------------------ | Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time | ------------------------------------------------------------------------------------ | 0 | SELECT STATEMENT | | 1 | 123 | | 1852 (1)| 00:00:23 | | 1 | MERGE JOIN | | 1 | 123 | | 1852 (1)| 00:00:23 | | 2 | SORT JOIN | | 1 | 57 | | 4 (25)| 00:00:01 | |* 3 | TABLE ACCESS FULL| T1 | 1 | 57 | | 3 (0)| 00:00:01 | |* 4 | SORT JOIN | | 100K| 6445K| 15M| 1848 (1)| 00:00:23 | | 5 | TABLE ACCESS FULL| T2 | 100K| 6445K| | 273 (1)| 00:00:04 | ------------------------------------------------------------------------------------ 3 - filter("T1"."N"=19) 4 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 统计信息 ---------------------------------------------------------- 0 recursive calls 0 db block gets 1012 consistent gets 0 physical reads 0 redo size 880 bytes sent via SQL*Net to client 415 bytes received via SQL*Net from client 2 SQL*Net roundtrips to/from client 2 sorts (memory) 0 sorts (disk) 1 rows processed --接下来在t1表建索引,发现排序消除了一个。 CREATE INDEX idx_t1_id ON t1(id); set linesize 1000 set autotrace traceonly SELECT /*+ leading(t1) use_merge(t2)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id; -------------------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time | -------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 100 | 397K| | 47930 (1)| 00:09:36 | | 1 | MERGE JOIN | | 100 | 397K| | 47930 (1)| 00:09:36 | | 2 | TABLE ACCESS BY INDEX ROWID| T1 | 100 | 198K| | 2 (0)| 00:00:01 | | 3 | INDEX FULL SCAN | IDX_T1_ID | 100 | | | 1 (0)| 00:00:01 | |* 4 | SORT JOIN | | 111K| 217M| 582M| 47928 (1)| 00:09:36 | | 5 | TABLE ACCESS FULL | T2 | 111K| 217M| | 274 (1)| 00:00:04 | -------------------------------------------------------------------------------------------------- 4 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 统计信息 ---------------------------------------------------------- 0 recursive calls 0 db block gets 1021 consistent gets 0 physical reads 0 redo size 13432 bytes sent via SQL*Net to client 481 bytes received via SQL*Net from client 8 SQL*Net roundtrips to/from client 1 sorts (memory) 0 sorts (disk) 100 rows processed --接下来在t2表的连接条件建索引,发现排序依然有一个,无法消除。 CREATE INDEX idx_t2_t1_id ON t2(t1_id); set linesize 1000 set autotrace traceonly SELECT /*+ leading(t1) use_merge(t2)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id; -------------------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time | -------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 100 | 397K| | 38263 (1)| 00:07:40 | | 1 | MERGE JOIN | | 100 | 397K| | 38263 (1)| 00:07:40 | | 2 | TABLE ACCESS BY INDEX ROWID| T1 | 100 | 198K| | 2 (0)| 00:00:01 | | 3 | INDEX FULL SCAN | IDX_T1_ID | 100 | | | 1 (0)| 00:00:01 | |* 4 | SORT JOIN | | 89127 | 173M| 464M| 38261 (1)| 00:07:40 | | 5 | TABLE ACCESS FULL | T2 | 89127 | 173M| | 273 (1)| 00:00:04 | -------------------------------------------------------------------------------------------------- 4 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 统计信息 ---------------------------------------------------------- 0 recursive calls 0 db block gets 1021 consistent gets 0 physical reads 0 redo size 13432 bytes sent via SQL*Net to client 482 bytes received via SQL*Net from client 8 SQL*Net roundtrips to/from client 1 sorts (memory) 0 sorts (disk) 100 rows processed --T2表如果一定要索引扫描呢,比如如下语句,字段只取t2表的t1_id SELECT /*+ leading(t1) use_merge(t2)*/ t2.t1_id FROM t1, t2 WHERE t1.id = t2.t1_id; ----------------------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time | ----------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 100 | 2600 | | 494 (2)| 00:00:06 | | 1 | MERGE JOIN | | 100 | 2600 | | 494 (2)| 00:00:06 | | 2 | INDEX FULL SCAN | IDX_T1_ID | 100 | 1300 | | 1 (0)| 00:00:01 | |* 3 | SORT JOIN | | 89127 | 1131K| 3512K| 493 (2)| 00:00:06 | | 4 | INDEX FAST FULL SCAN| IDX_T2_T1_ID | 89127 | 1131K| | 66 (2)| 00:00:01 | ----------------------------------------------------------------------------------------------- 3 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 统计信息 ---------------------------------------------------------- 7 recursive calls 0 db block gets 311 consistent gets 222 physical reads 0 redo size 1686 bytes sent via SQL*Net to client 482 bytes received via SQL*Net from client 8 SQL*Net roundtrips to/from client 1 sorts (memory) 0 sorts (disk) 100 rows processed --发现无论如何都不能同时消除排序合并连接两边的排序,
避免取多余列致排序尺寸过大
/* 结论: Merge Sort Join连接优化,避免多余列致排序尺寸过大 */ --环境构造 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 100 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; select count(*) from t1; select count(*) from t2; --Merge Sort Join取所有字段的情况 alter session set statistics_level=all ; set linesize 1000 SELECT /*+ leading(t2) use_merge(t1)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ----------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ----------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.14 | 1012 | | | | | 1 | MERGE JOIN | | 1 | 1 | 1 |00:00:00.14 | 1012 | | | | | 2 | SORT JOIN | | 1 | 89127 | 20 |00:00:00.13 | 1005 | 9266K| 1184K| 8236K (0)| | 3 | TABLE ACCESS FULL| T2 | 1 | 89127 | 100K|00:00:00.03 | 1005 | | | | |* 4 | SORT JOIN | | 20 | 1 | 1 |00:00:00.01 | 7 | 2048 | 2048 | 2048 (0)| |* 5 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 7 | | | | ----------------------------------------------------------------------------------------------------------------- 4 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 5 - filter("T1"."N"=19) ---Merge Sort Join取部分字段的情况 SELECT /*+ leading(t2) use_merge(t1)*/ t1.id FROM t1, t2 WHERE t1.id = t2.t1_id and t1.n=19; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ----------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ----------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.11 | 1012 | | | | | 1 | MERGE JOIN | | 1 | 1 | 1 |00:00:00.11 | 1012 | | | | | 2 | SORT JOIN | | 1 | 89127 | 20 |00:00:00.11 | 1005 | 1895K| 658K| 1684K (0)| | 3 | TABLE ACCESS FULL| T2 | 1 | 89127 | 100K|00:00:00.03 | 1005 | | | | |* 4 | SORT JOIN | | 20 | 1 | 1 |00:00:00.01 | 7 | 2048 | 2048 | 2048 (0)| |* 5 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 7 | | | | ----------------------------------------------------------------------------------------------------------------- 4 - access("T1"."ID"="T2"."T1_ID") filter("T1"."ID"="T2"."T1_ID") 5 - filter("T1"."N"=19)
保证PGA尺寸:
这里的场景是参与排序合并连接的尺寸过大的时候,这时候我们可以考虑增大PGA
如果是oracle11g,默认是直接增大memory_target
HASH连接和NL连接只取部分列基本不会有什么性能提升:
/* 结论: HASH连接和NL连接只取部分列基本不会有什么性能提升。 */ --环境构造 DROP TABLE t1 CASCADE CONSTRAINTS PURGE; DROP TABLE t2 CASCADE CONSTRAINTS PURGE; CREATE TABLE t1 ( id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; CREATE TABLE t2 ( id NUMBER NOT NULL, t1_id NUMBER NOT NULL, n NUMBER, contents VARCHAR2(4000) ) ; execute dbms_random.seed(0); INSERT INTO t1 SELECT rownum, rownum, dbms_random.string('a', 50) FROM dual CONNECT BY level <= 100 ORDER BY dbms_random.random; INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000 ORDER BY dbms_random.random; COMMIT; select count(*) from t1; select count(*) from t2; --第3式,取部分字段,减少排序尺寸! --Merge Sort Join取所有字段的情况 alter session set statistics_level=all ; set linesize 1000 SELECT /*+ leading(t1) use_hash(t2)*/ * FROM t1, t2 WHERE t1.id = t2.t1_id; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ---------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.06 | 1006 | | | | |* 1 | HASH JOIN | | 1 | 100 | 100 |00:00:00.06 | 1006 | 742K| 742K| 1199K (0)| | 2 | TABLE ACCESS FULL| T1 | 1 | 100 | 100 |00:00:00.01 | 7 | | | | | 3 | TABLE ACCESS FULL| T2 | 1 | 116K| 100K|00:00:00.02 | 999 | | | | ---------------------------------------------------------------------------------------------------------------- 1 - access("T1"."ID"="T2"."T1_ID") SELECT /*+ leading(t1) use_hash(t2)*/ t1.id FROM t1, t2 WHERE t1.id = t2.t1_id; select * from table(dbms_xplan.display_cursor(null,null,'allstats last')); ---------------------------------------------------------------------------------------------------------------- | Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem | ---------------------------------------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.07 | 1006 | | | | |* 1 | HASH JOIN | | 1 | 100 | 100 |00:00:00.07 | 1006 | 1066K| 1066K| 1223K (0)| | 2 | TABLE ACCESS FULL| T1 | 1 | 100 | 100 |00:00:00.01 | 7 | | | | | 3 | TABLE ACCESS FULL| T2 | 1 | 116K| 100K|00:00:00.02 | 999 | | | | ---------------------------------------------------------------------------------------------------------------- 1 - access("T1"."ID"="T2"."T1_ID")