[20190214]11g Query Result Cache RC Latches.txt
[20190214]11g Query Result Cache RC Latches.txt
--//昨天我重复链接http://www.pythian.com/blog/oracle-11g-query-result-cache-rc-latches/的测试,
--//按照我的理解如果sql语句密集执行,使用Result Cache反而更加糟糕,这是我以前没有注意到的。
--//联想我们生产系统也存在类似的问题,我们有1个判断连接的语句select count(*) from test_connect;
--//在业务高峰它执行可以达到1600次/秒。另外一个简单的select sysdate from dual; 也达到800次/秒。
--//而实际上业务高峰sql语句执行率3000次/秒。这样的2条语句就占了2400次/秒。我以前一直以为将表设置
--//为result cache,可能提高执行效率,还是通过例子测试看看。
1.环境:
SCOTT@book> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
SCOTT@book> show parameter job
NAME TYPE VALUE
------------------- ------- ------
job_queue_processes integer 200
SCOTT@book> select * from v$latchname where name like 'Result Cache%';
LATCH# NAME HASH
------ ----------------------- ----------
436 Result Cache: RC Latch 1054203712
437 Result Cache: SO Latch 986859868
438 Result Cache: MB Latch 995186388
--//我看到Result Cache名字与作者的不同,命名为Result Cache: RC Latch。
SCOTT@book> select name,gets from v$latch where lower(name) like '%result cache%';
NAME GETS
------------------------------ ----------
Result Cache: RC Latch 0
Result Cache: SO Latch 0
Result Cache: MB Latch 0
SCOTT@book> select count(*) from v$latch_children where lower(name) like '%result cache%';
COUNT(*)
----------
0
--//可以注意一个细节,Result Cache没有children latch。也仅仅1个Result Cache: RC Latch 父latch。从这里也可以看出如果
--//做了result cache的表,多个用户并发执行,可能反而不能获得好的性能,可能出现大量的Result Cache: RC Latch争用的情况.
2.建立测试例子:
create table t as select rownum id from dual ;
create unique index pk_t on t(id);
--//分析略。
SCOTT@book> create table job_times ( sid number, time_ela number);
Table created.
--//按照源链接的例子修改如下:
create or replace procedure do_work(
p_iterations in number
) is
l_rowid rowid;
v_t number;
begin
insert into job_times
values (sys_context('userenv', 'sid'), dbms_utility.get_time)
returning rowid into l_rowid;
for i in 1 .. p_iterations
loop
select count(*) into v_t from t;
end loop;
update job_times set
time_ela=dbms_utility.get_time-time_ela
where rowid=l_rowid;
commit;
end;
/
3.测试:
--//首先测试不做result cache的情况:
--//alter table t result_cache (mode default);
declare
l_job number;
begin
for i in 1 .. 50
loop
dbms_job.submit(
job => l_job,
what => 'do_work(1000000);'
);
end loop;
end;
/
SCOTT@book> commit ;
Commit complete.
--//注意一定要写提交,不然dbms_job.submit要等很久才执行。
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
50 9235.1 461755
4.测试:
--///测试做result cache的情况,为了测试的准确,我重启数据库。
SCOTT@book> delete from job_times;
50 rows deleted.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter table t result_cache (mode force);
Table altered.
--//重启数据库.
SCOTT@book> select name, gets, misses, sleeps, wait_time from v$latch where name like 'Result Cache%';
NAME GETS MISSES SLEEPS WAIT_TIME
------------------------------ ---------- ---------- ---------- ----------
Result Cache: RC Latch 0 0 0 0
Result Cache: SO Latch 0 0 0 0
Result Cache: MB Latch 0 0 0 0
declare
l_job number;
begin
for i in 1 .. 50
loop
dbms_job.submit(
job => l_job,
what => 'do_work(100000);'
);
end loop;
end;
/
SCOTT@book> commit ;
Commit complete.
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
50 7135.96 356798
SCOTT@book> select name, gets, misses, sleeps, wait_time from v$latch where name like 'Result Cache%';
NAME GETS MISSES SLEEPS WAIT_TIME
------------------------------ ---------- ---------- ---------- ----------
Result Cache: RC Latch 54232541 3499238 0 0
Result Cache: SO Latch 202 0 0 0
Result Cache: MB Latch 0 0 0 0
--//很明显,即使存在Result Cache: RC Latch的争用,但是WAIT_TIME=0,不过我发现这样测试的一个缺点,就是50个job并不是同时运行.
--//$ ps -ef | grep ora_[j]|wc ,看看数量是不断增加的过程.
--//而且采用Result Cache后效果还是增强的.
5.换一个方式测试:
SCOTT@book> delete from job_times;
53 rows deleted.
SCOTT@book> commit ;
Commit complete.
--//设置result_cache=default
SCOTT@book> alter table t result_cache (mode default);
Table altered.
$ seq 50 | xargs -I{} echo 'sqlplus -s -l scott/book <<< "execute do_work(1000000)" & '| bash
--//等全部完成...
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
50 10588.26 529413
SCOTT@book> delete from job_times;
50 rows deleted.
SCOTT@book> commit ;
Commit complete.
--//设置result_cache=force
SCOTT@book> alter table t result_cache (mode force);
Table altered.
$ seq 50 | xargs -I{} echo 'sqlplus -s -l scott/book <<< "execute do_work(1000000)" & '| bash
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
50 8573.28 428664
--//可以看到即使这样大并发,采用result cache还是要快许多,没有遇到作者的情况.
--//可以11GR2做了一些改进,不会遇到这样的情况.
SCOTT@book> column name format a30
SCOTT@book> select name, gets, misses, sleeps, wait_time from v$latch where name like 'Result Cache%';
NAME GETS MISSES SLEEPS WAIT_TIME
------------------------------ ---------- ---------- ---------- ----------
Result Cache: RC Latch 103461569 7263987 0 0
Result Cache: SO Latch 302 0 0 0
Result Cache: MB Latch 0 0 0 0
6.不过当我拿作者的最后的例子做最后的测试发现,使用result cache慢很多.
SCOTT@book> create cluster hc ( n number(*,0)) single table hashkeys 15000 size 230;
Cluster created.
SCOTT@book> create table hc_t ( n number(*,0), v varchar2(200)) cluster hc (n);
Table created.
SCOTT@book> insert into hc_t select level, dbms_random.string('p', 200) from dual connect by level <= 10000;
10000 rows created.
SCOTT@book> commit;
Commit complete.
--//分析表略.
All we need now is two procedures, one with a regular select and another with a cached select:
create or replace procedure do_hc(
p_iterations in number
) is
l_rowid rowid;
l_n number;
begin
insert into job_times
values (sys_context('userenv', 'sid'), dbms_utility.get_time)
returning rowid into l_rowid;
for i in 1 .. p_iterations
loop
l_n:=trunc(dbms_random.value(1, 10000));
for cur in (select * from hc_t where n=l_n)
loop
null;
end loop;
end loop;
update job_times set
time_ela=dbms_utility.get_time-time_ela
where rowid=l_rowid;
end;
/
Procedure created.
create or replace procedure do_rc(
p_iterations in number
) is
l_rowid rowid;
l_n number;
begin
insert into job_times
values (sys_context('userenv', 'sid'), dbms_utility.get_time)
returning rowid into l_rowid;
for i in 1 .. p_iterations
loop
l_n:=trunc(dbms_random.value(1, 10000));
for cur in (select /*+ result_cache */ * from hc_t where n=l_n)
loop
null;
end loop;
end loop;
update job_times set
time_ela=dbms_utility.get_time-time_ela
where rowid=l_rowid;
end;
/
Procedure created.
The hash cluster will go first:
SCOTT@book> delete from job_times;
4 rows deleted.
SQL> commit;
Commit complete.
declare
l_job number;
begin
for i in 1 .. 4
loop
dbms_job.submit(
job => l_job,
what => 'do_hc(100000);'
);
end loop;
end;
/
PL/SQL procedure successfully completed.
SCOTT@book> commit ;
Commit complete.
--allow jobs to complete
SCOTT@book> select case grouping(sid) when 1 then 'Total:' else to_char(sid) end sid, sum(time_ela) ela from job_times group by rollup((sid, time_ela));
SID ELA
------- ----
41 446
54 437
80 438
94 437
Total: 1758
--//每个测试仅仅需要4秒.
Now let's see if Result Cache can beat those numbers:
SCOTT@book> delete from job_times;
4 rows deleted.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> select name, gets, misses, sleeps, wait_time from v$latch where name like 'Result Cache%';
NAME GETS MISSES SLEEPS WAIT_TIME
------------------------------ ---------- ---------- ---------- ----------
Result Cache: RC Latch 20385043 535762 5 94
Result Cache: SO Latch 9 0 0 0
Result Cache: MB Latch 0 0 0 0
declare
l_job number;
begin
for i in 1 .. 4
loop
dbms_job.submit(
job => l_job,
what => 'do_rc(100000);'
);
end loop;
end;
/
PL/SQL procedure successfully completed.
SCOTT@book> commit ;
Commit complete.
--allow jobs to complete
SCOTT@book> select case grouping(sid) when 1 then 'Total:' else to_char(sid) end sid, sum(time_ela) ela from job_times group by rollup((sid, time_ela));
SID ELA
------ ------
41 3850
54 3853
80 3860
94 3863
Total: 15426
--//我的测试使用Result Cache 更加糟糕!!每个测试需要38秒.而作者的测试两者几乎差不多.作者用 Nothing (almost) 来表达.
SCOTT@book> select name, gets, misses, sleeps, wait_time from v$latch where name like 'Result Cache%';
NAME GETS MISSES SLEEPS WAIT_TIME
------------------------------ ---------- ---------- ---------- ----------
Result Cache: RC Latch 21768802 1045691 663187 64314325
Result Cache: SO Latch 17 0 0 0
Result Cache: MB Latch 0 0 0 0
--//我开始以为这里有1个将结果集放入共享池的过程,每一次执行都需要放入共享池.再次调用应该会快一些.
create or replace procedure do_rc(
p_iterations in number
) is
l_rowid rowid;
l_n number;
begin
insert into job_times
values (sys_context('userenv', 'sid'), dbms_utility.get_time)
returning rowid into l_rowid;
for i in 1 .. p_iterations
loop
l_n:=trunc(dbms_random.value(1, 10000));
for cur in (select /*+ result_cache */ * from hc_t where n=l_n)
loop
null;
end loop;
end loop;
update job_times set
time_ela=dbms_utility.get_time-time_ela
where rowid=l_rowid;
end;
/
--//再次执行:
declare
l_job number;
begin
for i in 1 .. 4
loop
dbms_job.submit(
job => l_job,
what => 'do_rc(100000);'
);
end loop;
end;
/
PL/SQL procedure successfully completed.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> select case grouping(sid) when 1 then 'Total:' else to_char(sid) end sid, sum(time_ela) ela from job_times group by rollup((sid, time_ela));
SID ELA
----- -----
72 3980
81 3900
96 3936
108 3922
Total 15738
--//问题依旧.我估计不同查询存在select /*+ result_cache */ * from hc_t where n=l_n的情况下,探查Result Cache: RC Latch持有
--//时间很长,导致使用result cache更慢,这样看来result_cache更加适合统计类结果不变的语句.而且绑定变量不要变化很多的情况.
--//换成普通表测试看看:
SCOTT@book> rename hc_t to hc_tx;
Table renamed.
SCOTT@book> create table hc_t as select * from hc_tx ;
Table created.
SCOTT@book> create unique index i_hc_t on hc_t(n);
Index created.
--//分析表略.
--//调用do_hc的情况如下:
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
4 431.5 1726
--//调用do_rc的情况如下:
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
4 4027.75 16111
--//结果一样.删除索引在测试看看.
SCOTT@book> drop index i_hc_t ;
Index dropped.
--//调用do_hc的情况如下:
--//delete from job_times;
--//commit ;
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
4 4160 16640
--//调用do_rc的情况如下:
--//delete from job_times;
--//commit ;
SCOTT@book> select count(*),avg(TIME_ELA),sum(TIME_ELA) from job_times ;
COUNT(*) AVG(TIME_ELA) SUM(TIME_ELA)
---------- ------------- -------------
4 3828 15312
--//这个时候result cache优势才显示出来.总之在生产系统使用要注意这个细节,一般result cahe仅仅只读表(dml很少的静态表)外.
--//如果经常使用不同变量查询表,能使用索引的情况,使用result cache毫无优势可言.