[20250205]21c library cache mutex的深入探究10(_mutex_wait_scheme=1).txt
[20250205]21c library cache mutex的深入探究10(_mutex_wait_scheme=1).txt
--//探究library cache mutex阻塞导致的相关等待事件,分析_mutex_wait_scheme=1的情况的相关细节。
1.环境:
SCOTT@book01p> @ ver2
==============================
PORT_STRING : x86_64/Linux 2.4.xx
VERSION : 21.0.0.0.0
BANNER : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
BANNER_FULL : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.3.0.0.0
BANNER_LEGACY : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
CON_ID : 0
PL/SQL procedure successfully completed.
SYS@book> @ hidez ^_mutex
NUM N_HEX NAME DESCRIPTION DEFAULT_VALUE SESSION_VALUE SYSTEM_VALUE ISSES ISSYS_MOD
---- ----- ------------------ ----------------- ------------- ------------- ------------ ----- ---------
3553 DE1 _mutex_wait_time Mutex wait time TRUE 1 1 FALSE IMMEDIATE
3554 DE2 _mutex_spin_count Mutex spin count TRUE 255 255 FALSE IMMEDIATE
3555 DE3 _mutex_wait_scheme Mutex wait scheme TRUE 2 2 FALSE IMMEDIATE
--//缺省_mutex_wait_time=1,时间单位与_mutex_wait_scheme相关,_mutex_wait_scheme=2时时间单位是厘秒,而
--//_mutex_wait_scheme=0,1时,单位时毫秒。
--//_mutex_wait_scheme =2时,_mutex_wait_time>1时sleeps的时间会出现指数回退.
--//缺省_mutex_wait_scheme =2.
--//网上找了一段资料:
* _mutex_spin_count (Integer)
- This sets the number of times to spin before yielding/waiting.
* _mutex_wait_scheme (Integer)
- In 11.2 this controls which wait scheme to use. It can be set to one of the three wait schemes described above thus:
_mutex_wait_scheme = 0 – Always YIELD
_mutex_wait_scheme = 1 & _mutex_wait_time = t – Always SLEEP for t milli-seconds
_mutex_wait_scheme = 2 & _mutex_wait_time = t – EXP BACKOFF with maximum sleep (default)
2.测试前准备:
SYS@book> oradebug setmypid
Statement processed.
SYS@book> oradebug dump library_cache 4
Statement processed.
$ grep "^Bucket:" /u01/app/oracle/diag/rdbms/book/book/trace/book_ora_10770.trc | head -4
Bucket: #=0 Mutex=0x6cfa1400(1125281431552, 9, 0, 6)
Bucket: #=5 Mutex=0x6cfa14f0(1125281431552, 4, 0, 6)
Bucket: #=17 Mutex=0x6cfa1730(1125281431552, 8, 0, 6)
Bucket: #=39 Mutex=0x6cfa1b50(1125281431552, 4, 0, 6)
--//以上是前面测试的结果,直接找Bucket: #=0 Mutex=0x6cfa1400的语句测试。
--//该sql语句select /*+ 9 */ count(*) from dept where deptno = 93834;的bucket=0.
--//Bucket: #=0 Mutex=0x6cfa1400
--//0x6cfa1400-0x10 = 0x6cfa13f0
--//昨天测试了_mutex_wait_scheme=2的情况,当_mutex_wait_time>1时才会出现EXP BACKOFF with maximum sleep。
--//上午测试_mutex_wait_scheme=0的情况,下午测试_mutex_wait_scheme=1的情况.
--//session 2:
SYS@book> alter system set "_mutex_wait_scheme"=1 scope=memory;
System altered.
2.测试:
--//session 1:
SCOTT@book01p> @ spid
==============================
SID : 148
SERIAL# : 21638
PROCESS : 3538
SERVER : DEDICATED
SPID : 3540
PID : 49
P_SERIAL# : 4
KILL_COMMAND : alter system kill session '148,21638' immediate;
PL/SQL procedure successfully completed.
SCOTT@book01p> alter session set session_cached_cursors=0 ;
Session altered.
--//主要目的避免光标缓存,保证每次执行library cache mutex gets增加。
SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;
COUNT(*)
----------
0
--//执行多次。
SCOTT@book01p> @ hashz
HASH_VALUE SQL_ID CHILD_NUMBER KGL_BUCKET HASH_HEX SQL_EXEC_START SQL_EXEC_ID
---------- ------------- ------------ ---------- ---------- ------------------- -----------
3403546624 as3g00v5dw000 0 0 cade0000 2025-02-05 15:32:19 16777219
--//session 2:
SYS@book> @opeek 0x6cfa13f0 48 0
[06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 00000007 00000000 00000000 00000000 67B3EFC0 00000000
--//gets=0x07
SYS@book> @ sharepool/shp4z as3g00v5dw000 -1
SYS@book> @ pr
==============================
HANDLE_TYPE : parent handle address
KGLHDADR : 0000000064283498
KGLHDPAR : 0000000064283498
C40 : select /*+ 9 */ count(*) from dept where
KGLHDLMD : 0
KGLHDPMD : 0
KGLHDIVC : 0
KGLOBHD0 : 0000000067B06080
KGLOBHD6 : 00
KGLOBHS0 : 4064
KGLOBHS6 : 0
KGLOBT16 : 0
N0_6_16 : 4064
N20 : 4064
KGLNAHSH : 3403546624
KGLOBT03 : as3g00v5dw000
KGLOBT09 : 65535
PL/SQL procedure successfully completed.
--//父游标句柄地址0000000064283498已经写入mutex地址-0x10处。
SYS@book> @opeek 0x6cfa13f0 48 0
[06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 0000000B 00000000 00000000 00000000 67B3EFC0 00000000
--//gets=0xb,似乎执行shp4z脚本也会导致gets数量增加。
4.继续测试:
--//由于目前版本21c不支持oradebug poke内存信息,采用gdb操作。
--//session 3:
SYS@book> @ spid
==============================
SID : 407
SERIAL# : 16644
PROCESS : 3833
SERVER : DEDICATED
SPID : 3834
PID : 99
P_SERIAL# : 2
KILL_COMMAND : alter system kill session '407,16644' immediate;
PL/SQL procedure successfully completed.
--//window 1:
$ strace -Ttt -y -f -p 3828 2>&1 | tee 0205mutex.txt
--//window 2:
$rlgdb -f -p 3834
...
(gdb) x /8wx 0x6cfa1400
0x6cfa1400: 0x00000000 0x00000000 0x0000000c 0x00000000
0x6cfa1410: 0x00000000 0x00000000 0x67b3efc0 0x00000000
(gdb) set *(int *)0x6cfa1404=0x8888
(gdb) x /8wx 0x6cfa1400
0x6cfa1400: 0x00000000 0x00008888 0x0000000c 0x00000000
0x6cfa1410: 0x00000000 0x00000000 0x67b3efc0 0x00000000
--//gets数量0x0c
--//session 1:
SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;
--//挂起!!等3X秒以上。
--//window 2:
(gdb) set *(int *)0x6cfa1404=0x0
(gdb) x /8wx 0x6cfa1400
0x6cfa1400: 0x00000000 0x00000000 0x0000000d 0x00002cf8
0x6cfa1410: 0x00000000 0x00000000 0x67b3efc0 0x00000000
--//gets增加到0xd,增加1次,sleeps增加到0x00002cf8 = 11512
--//session 1:
SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;
COUNT(*)
----------
0
--//执行完成。
5.分析:
--//session 2:
SYS@book> @ ashtop event,sid,p1,p1raw,p2raw,p3raw 1=1 "'2025-02-05 15:37:50'" "'2025-02-05 15:38:34'"
Total Distinct Distinct Distinct
Seconds AAS %This EVENT SID P1 P1RAW P2RAW P3RAW FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ----------------------------- --- ---------- ----------------- ----------------- ----------------- ------------------- ------------------- ---------- -------- -----------
43 1.0 70% | library cache: bucket mutex X 283 0 0000000000000000 0000888800000000 000000000000003E 2025-02-05 15:37:51 2025-02-05 15:38:33 1 43 43
9 .2 15% | 5 3 2025-02-05 15:37:51 2025-02-05 15:38:33 1 9 9
5 .1 8% | 259 100 2025-02-05 15:38:06 2025-02-05 15:38:30 1 5 5
2 .0 3% | LGWR all worker groups 258 0 0000000000000000 0000000000000000 0000000000000000 2025-02-05 15:37:52 2025-02-05 15:38:01 1 2 2
1 .0 2% | 1 100 2025-02-05 15:37:59 2025-02-05 15:37:59 1 1 1
1 .0 2% | 136 0 2025-02-05 15:37:52 2025-02-05 15:37:52 1 1 1
6 rows selected.
--//window 2:
--//按ctrl+c中断strace跟踪。
$ egrep "getrusage" 0205mutex.txt
15:37:50.871140 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 287649}, ru_stime={0, 38280}, ...}) = 0 <0.000018>
15:37:52.533279 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 300599}, ru_stime={0, 42463}, ...}) = 0 <0.000014>
15:37:54.534693 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 312143}, ru_stime={0, 48198}, ...}) = 0 <0.000062>
15:37:56.539985 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 329404}, ru_stime={0, 58908}, ...}) = 0 <0.000018>
15:37:58.542490 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 332266}, ru_stime={0, 70013}, ...}) = 0 <0.000029>
15:38:00.546928 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 352535}, ru_stime={0, 83527}, ...}) = 0 <0.000045>
15:38:02.550225 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 371273}, ru_stime={0, 96018}, ...}) = 0 <0.000011>
15:38:04.558715 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 377830}, ru_stime={0, 109984}, ...}) = 0 <0.000014>
15:38:06.559932 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 397581}, ru_stime={0, 136085}, ...}) = 0 <0.000013>
15:38:08.570251 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 409084}, ru_stime={0, 152916}, ...}) = 0 <0.000028>
15:38:10.572618 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 427753}, ru_stime={0, 167381}, ...}) = 0 <0.000028>
15:38:12.586048 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 441045}, ru_stime={0, 193792}, ...}) = 0 <0.000081>
15:38:14.604125 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 454608}, ru_stime={0, 212421}, ...}) = 0 <0.000042>
15:38:16.604363 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 464452}, ru_stime={0, 231549}, ...}) = 0 <0.000028>
15:38:18.626482 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 479808}, ru_stime={0, 252242}, ...}) = 0 <0.000013>
15:38:20.634835 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 490703}, ru_stime={0, 263907}, ...}) = 0 <0.000013>
15:38:22.639064 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 497624}, ru_stime={0, 281622}, ...}) = 0 <0.000018>
15:38:24.640630 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 507448}, ru_stime={0, 292334}, ...}) = 0 <0.000013>
15:38:26.656429 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 511999}, ru_stime={0, 311053}, ...}) = 0 <0.000091>
15:38:28.656760 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 528800}, ru_stime={0, 332948}, ...}) = 0 <0.000015>
15:38:30.659396 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 539324}, ru_stime={0, 341291}, ...}) = 0 <0.000014>
15:38:32.660070 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554088}, ru_stime={0, 358865}, ...}) = 0 <0.000013>
15:38:33.698162 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554088}, ru_stime={0, 379381}, ...}) = 0 <0.000010>
15:38:33.699511 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 379524}, ...}) = 0 <0.000008>
--//调用getrusage是2秒间隔.
$ awk '{print $2}' 0205mutex.txt |egrep "semtimedop|getrusage|sched_yield|select" | uniq -c | head -10
1 getrusage(0x1
1 sched_yield()
339 select(0,
1 getrusage(0x1
329 select(0,
1 getrusage(0x1
614 select(0,
1 getrusage(0x1
317 select(0,
1 getrusage(0x1
--//开始1次sched_yield()调用,其后调用select许多次,2秒之后调用getrusage,如此反复循环。
$ grep select 0205mutex.txt | head
15:37:50.871613 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001440>
15:37:50.873155 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001795>
15:37:50.875050 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001956>
15:37:50.877092 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001829>
15:37:50.879008 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001840>
15:37:50.880933 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001878>
15:37:50.882895 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001873>
15:37:50.884854 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001943>
15:37:50.886881 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001821>
15:37:50.888788 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001927>
--//休眠的时间单位微秒(1/10^6秒),1000微秒= 1000/10^6 = .001秒。select休眠的时间很短1毫秒。
--//而_mutex_wait_scheme=0是99次sched_yield,1次select(0.001秒),遇到getrusage分开,相加次数还是99次,如此反复循环。
--//而_mutex_wait_scheme=2,使用semtimedop调用(0.01秒)。
SYS@book> @ mutexprofz idn,loc "ts>=trunc(sysdate)+15/24+37/1440+50/86400"
-- MutexProf by Tanel Poder (http://www.tanelpoder.com)
-- Showing profile of top 20 sleeps...
-- column info : id idn hash hash_value=>hash_value ts=>sleep_timestamp
-- req=>requesting_session blk=>blocking_session val=>mutex_value maddr=>mutex_addr
SUM_SLEEPS GETS_DIFF MUTEX_TYPE IDN GET_LOCATION OBJECT_NAME
---------- -------------- --------------- ---------- --------------------------------- --------------------------------------------------------------------------------
11512 Library Cache 0 kglhdgn1 62 (name not found)
2 Library Cache 1068676711 kglpin1 4 SELECT /*+ NO_STATEMENT_QUEUING RESULT_CACHE (SYSOBJ=FALSE) */
"TS#","NAME","ONLINE$","CONTENTS$","BLOCKSIZE","BITMAPPED","FLAGS" FROM
"SYS"."TS$" "K" WHERE 1=1
--//0x00002cf8 = 11512
--//记录的sleeps次数也是11512次。GET_LOCATION 对应的数字62 与前面ashtop 看到的p2 0x3e对应,注意id=0表示bucket桶号。
$ grep "select" 0205mutex.txt |wc
11511 126621 851814
$ grep "sched_yield" 0205mutex.txt |wc
1 5 55
--//11511+1=11512=0x2cf8完全能对应上。
SYS@book> @opeek 0x6cfa13f0 48 0
[06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 00000016 00002CF8 00000000 00000000 67B3EFC0 00000000
--//与opeek执行看到的一致。
6.小结:
--//缺省_mutex_wait_time=1,_mutex_wait_scheme=1的情况下select的时间是1毫秒。
--//_mutex_wait_scheme=1模式下开始1次sched_yield()调用,其后调用select许多次,2秒之后调用getrusage,如此反复循环。
--//_mutex_wait_scheme=1模式调用,调用select仅仅0.001秒。
--//测试mutex_wait_scheme=1,修改_mutex_wait_time的情况。按照前面介绍应该是select每次_mutex_wait_time,单位毫秒。
SYS@book> alter system set "_mutex_wait_time"=20 scope=memory;
System altered.
--//重复测试:
$ egrep "getrusage" 0205mutexa.txt | head -20
16:05:27.368592 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 381378}, ...}) = 0 <0.000009>
16:05:28.756046 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 382136}, ...}) = 0 <0.000027>
16:05:30.777212 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 555697}, ru_stime={0, 386260}, ...}) = 0 <0.000046>
16:05:32.809704 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 556296}, ru_stime={0, 389981}, ...}) = 0 <0.000029>
16:05:34.808683 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 558242}, ru_stime={0, 390338}, ...}) = 0 <0.000157>
16:05:36.813974 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 561839}, ru_stime={0, 393287}, ...}) = 0 <0.000030>
16:05:38.817664 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 563650}, ru_stime={0, 397445}, ...}) = 0 <0.000038>
16:05:40.821995 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 564663}, ru_stime={0, 401474}, ...}) = 0 <0.000145>
16:05:42.858149 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 569048}, ru_stime={0, 403980}, ...}) = 0 <0.000033>
16:05:44.856437 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 571326}, ru_stime={0, 405598}, ...}) = 0 <0.000080>
16:05:46.873433 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 573060}, ru_stime={0, 407250}, ...}) = 0 <0.000017>
16:05:48.876284 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 576291}, ru_stime={0, 410389}, ...}) = 0 <0.000018>
16:05:50.910784 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 578221}, ru_stime={0, 410726}, ...}) = 0 <0.000015>
16:05:52.890046 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 578221}, ru_stime={0, 413457}, ...}) = 0 <0.000029>
16:05:54.904141 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 581566}, ru_stime={0, 413947}, ...}) = 0 <0.000028>
16:05:56.913854 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 584040}, ru_stime={0, 415708}, ...}) = 0 <0.000046>
16:05:58.895999 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 585411}, ru_stime={0, 418569}, ...}) = 0 <0.000008>
16:05:58.897166 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 585536}, ru_stime={0, 418658}, ...}) = 0 <0.000009>
--//间隔还是2秒。
--//除非设置"_mutex_wait_time">2000,估计没人这么做。
$ awk '{print $2}' 0205mutexa.txt |egrep "semtimedop|getrusage|sched_yield|select" | uniq -c | head -10
1 getrusage(0x1
1 sched_yield()
35 select(0,
1 getrusage(0x1
76 select(0,
1 getrusage(0x1
62 select(0,
1 getrusage(0x1
42 select(0,
1 getrusage(0x1
$ grep "select" 0205mutexa.txt |head -4
16:05:27.368878 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.028785>
16:05:27.397848 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.031134>
16:05:27.429072 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.021033>
16:05:27.450178 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.040958>
--//休眠的时间单位微秒(1/10^6秒),20000微秒= 20000/10^6 = 0.020秒。select休眠的时间20毫秒。
--//符合前面介绍的_mutex_wait_scheme = 1 & _mutex_wait_time = t – Always SLEEP for t milli-seconds 一致。
--//探究library cache mutex阻塞导致的相关等待事件,分析_mutex_wait_scheme=1的情况的相关细节。
1.环境:
SCOTT@book01p> @ ver2
==============================
PORT_STRING : x86_64/Linux 2.4.xx
VERSION : 21.0.0.0.0
BANNER : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
BANNER_FULL : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.3.0.0.0
BANNER_LEGACY : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
CON_ID : 0
PL/SQL procedure successfully completed.
SYS@book> @ hidez ^_mutex
NUM N_HEX NAME DESCRIPTION DEFAULT_VALUE SESSION_VALUE SYSTEM_VALUE ISSES ISSYS_MOD
---- ----- ------------------ ----------------- ------------- ------------- ------------ ----- ---------
3553 DE1 _mutex_wait_time Mutex wait time TRUE 1 1 FALSE IMMEDIATE
3554 DE2 _mutex_spin_count Mutex spin count TRUE 255 255 FALSE IMMEDIATE
3555 DE3 _mutex_wait_scheme Mutex wait scheme TRUE 2 2 FALSE IMMEDIATE
--//缺省_mutex_wait_time=1,时间单位与_mutex_wait_scheme相关,_mutex_wait_scheme=2时时间单位是厘秒,而
--//_mutex_wait_scheme=0,1时,单位时毫秒。
--//_mutex_wait_scheme =2时,_mutex_wait_time>1时sleeps的时间会出现指数回退.
--//缺省_mutex_wait_scheme =2.
--//网上找了一段资料:
* _mutex_spin_count (Integer)
- This sets the number of times to spin before yielding/waiting.
* _mutex_wait_scheme (Integer)
- In 11.2 this controls which wait scheme to use. It can be set to one of the three wait schemes described above thus:
_mutex_wait_scheme = 0 – Always YIELD
_mutex_wait_scheme = 1 & _mutex_wait_time = t – Always SLEEP for t milli-seconds
_mutex_wait_scheme = 2 & _mutex_wait_time = t – EXP BACKOFF with maximum sleep (default)
2.测试前准备:
SYS@book> oradebug setmypid
Statement processed.
SYS@book> oradebug dump library_cache 4
Statement processed.
$ grep "^Bucket:" /u01/app/oracle/diag/rdbms/book/book/trace/book_ora_10770.trc | head -4
Bucket: #=0 Mutex=0x6cfa1400(1125281431552, 9, 0, 6)
Bucket: #=5 Mutex=0x6cfa14f0(1125281431552, 4, 0, 6)
Bucket: #=17 Mutex=0x6cfa1730(1125281431552, 8, 0, 6)
Bucket: #=39 Mutex=0x6cfa1b50(1125281431552, 4, 0, 6)
--//以上是前面测试的结果,直接找Bucket: #=0 Mutex=0x6cfa1400的语句测试。
--//该sql语句select /*+ 9 */ count(*) from dept where deptno = 93834;的bucket=0.
--//Bucket: #=0 Mutex=0x6cfa1400
--//0x6cfa1400-0x10 = 0x6cfa13f0
--//昨天测试了_mutex_wait_scheme=2的情况,当_mutex_wait_time>1时才会出现EXP BACKOFF with maximum sleep。
--//上午测试_mutex_wait_scheme=0的情况,下午测试_mutex_wait_scheme=1的情况.
--//session 2:
SYS@book> alter system set "_mutex_wait_scheme"=1 scope=memory;
System altered.
2.测试:
--//session 1:
SCOTT@book01p> @ spid
==============================
SID : 148
SERIAL# : 21638
PROCESS : 3538
SERVER : DEDICATED
SPID : 3540
PID : 49
P_SERIAL# : 4
KILL_COMMAND : alter system kill session '148,21638' immediate;
PL/SQL procedure successfully completed.
SCOTT@book01p> alter session set session_cached_cursors=0 ;
Session altered.
--//主要目的避免光标缓存,保证每次执行library cache mutex gets增加。
SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;
COUNT(*)
----------
0
--//执行多次。
SCOTT@book01p> @ hashz
HASH_VALUE SQL_ID CHILD_NUMBER KGL_BUCKET HASH_HEX SQL_EXEC_START SQL_EXEC_ID
---------- ------------- ------------ ---------- ---------- ------------------- -----------
3403546624 as3g00v5dw000 0 0 cade0000 2025-02-05 15:32:19 16777219
--//session 2:
SYS@book> @opeek 0x6cfa13f0 48 0
[06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 00000007 00000000 00000000 00000000 67B3EFC0 00000000
--//gets=0x07
SYS@book> @ sharepool/shp4z as3g00v5dw000 -1
SYS@book> @ pr
==============================
HANDLE_TYPE : parent handle address
KGLHDADR : 0000000064283498
KGLHDPAR : 0000000064283498
C40 : select /*+ 9 */ count(*) from dept where
KGLHDLMD : 0
KGLHDPMD : 0
KGLHDIVC : 0
KGLOBHD0 : 0000000067B06080
KGLOBHD6 : 00
KGLOBHS0 : 4064
KGLOBHS6 : 0
KGLOBT16 : 0
N0_6_16 : 4064
N20 : 4064
KGLNAHSH : 3403546624
KGLOBT03 : as3g00v5dw000
KGLOBT09 : 65535
PL/SQL procedure successfully completed.
--//父游标句柄地址0000000064283498已经写入mutex地址-0x10处。
SYS@book> @opeek 0x6cfa13f0 48 0
[06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 0000000B 00000000 00000000 00000000 67B3EFC0 00000000
--//gets=0xb,似乎执行shp4z脚本也会导致gets数量增加。
4.继续测试:
--//由于目前版本21c不支持oradebug poke内存信息,采用gdb操作。
--//session 3:
SYS@book> @ spid
==============================
SID : 407
SERIAL# : 16644
PROCESS : 3833
SERVER : DEDICATED
SPID : 3834
PID : 99
P_SERIAL# : 2
KILL_COMMAND : alter system kill session '407,16644' immediate;
PL/SQL procedure successfully completed.
--//window 1:
$ strace -Ttt -y -f -p 3828 2>&1 | tee 0205mutex.txt
--//window 2:
$rlgdb -f -p 3834
...
(gdb) x /8wx 0x6cfa1400
0x6cfa1400: 0x00000000 0x00000000 0x0000000c 0x00000000
0x6cfa1410: 0x00000000 0x00000000 0x67b3efc0 0x00000000
(gdb) set *(int *)0x6cfa1404=0x8888
(gdb) x /8wx 0x6cfa1400
0x6cfa1400: 0x00000000 0x00008888 0x0000000c 0x00000000
0x6cfa1410: 0x00000000 0x00000000 0x67b3efc0 0x00000000
--//gets数量0x0c
--//session 1:
SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;
--//挂起!!等3X秒以上。
--//window 2:
(gdb) set *(int *)0x6cfa1404=0x0
(gdb) x /8wx 0x6cfa1400
0x6cfa1400: 0x00000000 0x00000000 0x0000000d 0x00002cf8
0x6cfa1410: 0x00000000 0x00000000 0x67b3efc0 0x00000000
--//gets增加到0xd,增加1次,sleeps增加到0x00002cf8 = 11512
--//session 1:
SCOTT@book01p> select /*+ 9 */ count(*) from dept where deptno = 93834;
COUNT(*)
----------
0
--//执行完成。
5.分析:
--//session 2:
SYS@book> @ ashtop event,sid,p1,p1raw,p2raw,p3raw 1=1 "'2025-02-05 15:37:50'" "'2025-02-05 15:38:34'"
Total Distinct Distinct Distinct
Seconds AAS %This EVENT SID P1 P1RAW P2RAW P3RAW FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ----------------------------- --- ---------- ----------------- ----------------- ----------------- ------------------- ------------------- ---------- -------- -----------
43 1.0 70% | library cache: bucket mutex X 283 0 0000000000000000 0000888800000000 000000000000003E 2025-02-05 15:37:51 2025-02-05 15:38:33 1 43 43
9 .2 15% | 5 3 2025-02-05 15:37:51 2025-02-05 15:38:33 1 9 9
5 .1 8% | 259 100 2025-02-05 15:38:06 2025-02-05 15:38:30 1 5 5
2 .0 3% | LGWR all worker groups 258 0 0000000000000000 0000000000000000 0000000000000000 2025-02-05 15:37:52 2025-02-05 15:38:01 1 2 2
1 .0 2% | 1 100 2025-02-05 15:37:59 2025-02-05 15:37:59 1 1 1
1 .0 2% | 136 0 2025-02-05 15:37:52 2025-02-05 15:37:52 1 1 1
6 rows selected.
--//window 2:
--//按ctrl+c中断strace跟踪。
$ egrep "getrusage" 0205mutex.txt
15:37:50.871140 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 287649}, ru_stime={0, 38280}, ...}) = 0 <0.000018>
15:37:52.533279 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 300599}, ru_stime={0, 42463}, ...}) = 0 <0.000014>
15:37:54.534693 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 312143}, ru_stime={0, 48198}, ...}) = 0 <0.000062>
15:37:56.539985 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 329404}, ru_stime={0, 58908}, ...}) = 0 <0.000018>
15:37:58.542490 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 332266}, ru_stime={0, 70013}, ...}) = 0 <0.000029>
15:38:00.546928 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 352535}, ru_stime={0, 83527}, ...}) = 0 <0.000045>
15:38:02.550225 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 371273}, ru_stime={0, 96018}, ...}) = 0 <0.000011>
15:38:04.558715 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 377830}, ru_stime={0, 109984}, ...}) = 0 <0.000014>
15:38:06.559932 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 397581}, ru_stime={0, 136085}, ...}) = 0 <0.000013>
15:38:08.570251 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 409084}, ru_stime={0, 152916}, ...}) = 0 <0.000028>
15:38:10.572618 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 427753}, ru_stime={0, 167381}, ...}) = 0 <0.000028>
15:38:12.586048 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 441045}, ru_stime={0, 193792}, ...}) = 0 <0.000081>
15:38:14.604125 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 454608}, ru_stime={0, 212421}, ...}) = 0 <0.000042>
15:38:16.604363 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 464452}, ru_stime={0, 231549}, ...}) = 0 <0.000028>
15:38:18.626482 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 479808}, ru_stime={0, 252242}, ...}) = 0 <0.000013>
15:38:20.634835 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 490703}, ru_stime={0, 263907}, ...}) = 0 <0.000013>
15:38:22.639064 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 497624}, ru_stime={0, 281622}, ...}) = 0 <0.000018>
15:38:24.640630 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 507448}, ru_stime={0, 292334}, ...}) = 0 <0.000013>
15:38:26.656429 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 511999}, ru_stime={0, 311053}, ...}) = 0 <0.000091>
15:38:28.656760 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 528800}, ru_stime={0, 332948}, ...}) = 0 <0.000015>
15:38:30.659396 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 539324}, ru_stime={0, 341291}, ...}) = 0 <0.000014>
15:38:32.660070 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554088}, ru_stime={0, 358865}, ...}) = 0 <0.000013>
15:38:33.698162 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554088}, ru_stime={0, 379381}, ...}) = 0 <0.000010>
15:38:33.699511 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 379524}, ...}) = 0 <0.000008>
--//调用getrusage是2秒间隔.
$ awk '{print $2}' 0205mutex.txt |egrep "semtimedop|getrusage|sched_yield|select" | uniq -c | head -10
1 getrusage(0x1
1 sched_yield()
339 select(0,
1 getrusage(0x1
329 select(0,
1 getrusage(0x1
614 select(0,
1 getrusage(0x1
317 select(0,
1 getrusage(0x1
--//开始1次sched_yield()调用,其后调用select许多次,2秒之后调用getrusage,如此反复循环。
$ grep select 0205mutex.txt | head
15:37:50.871613 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001440>
15:37:50.873155 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001795>
15:37:50.875050 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001956>
15:37:50.877092 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001829>
15:37:50.879008 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001840>
15:37:50.880933 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001878>
15:37:50.882895 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001873>
15:37:50.884854 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001943>
15:37:50.886881 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001821>
15:37:50.888788 select(0, [], [], [], {0, 1000}) = 0 (Timeout) <0.001927>
--//休眠的时间单位微秒(1/10^6秒),1000微秒= 1000/10^6 = .001秒。select休眠的时间很短1毫秒。
--//而_mutex_wait_scheme=0是99次sched_yield,1次select(0.001秒),遇到getrusage分开,相加次数还是99次,如此反复循环。
--//而_mutex_wait_scheme=2,使用semtimedop调用(0.01秒)。
SYS@book> @ mutexprofz idn,loc "ts>=trunc(sysdate)+15/24+37/1440+50/86400"
-- MutexProf by Tanel Poder (http://www.tanelpoder.com)
-- Showing profile of top 20 sleeps...
-- column info : id idn hash hash_value=>hash_value ts=>sleep_timestamp
-- req=>requesting_session blk=>blocking_session val=>mutex_value maddr=>mutex_addr
SUM_SLEEPS GETS_DIFF MUTEX_TYPE IDN GET_LOCATION OBJECT_NAME
---------- -------------- --------------- ---------- --------------------------------- --------------------------------------------------------------------------------
11512 Library Cache 0 kglhdgn1 62 (name not found)
2 Library Cache 1068676711 kglpin1 4 SELECT /*+ NO_STATEMENT_QUEUING RESULT_CACHE (SYSOBJ=FALSE) */
"TS#","NAME","ONLINE$","CONTENTS$","BLOCKSIZE","BITMAPPED","FLAGS" FROM
"SYS"."TS$" "K" WHERE 1=1
--//0x00002cf8 = 11512
--//记录的sleeps次数也是11512次。GET_LOCATION 对应的数字62 与前面ashtop 看到的p2 0x3e对应,注意id=0表示bucket桶号。
$ grep "select" 0205mutex.txt |wc
11511 126621 851814
$ grep "sched_yield" 0205mutex.txt |wc
1 5 55
--//11511+1=11512=0x2cf8完全能对应上。
SYS@book> @opeek 0x6cfa13f0 48 0
[06CFA13F0, 06CFA1420) = 64283498 00000000 64283498 00000000 00000000 00000000 00000016 00002CF8 00000000 00000000 67B3EFC0 00000000
--//与opeek执行看到的一致。
6.小结:
--//缺省_mutex_wait_time=1,_mutex_wait_scheme=1的情况下select的时间是1毫秒。
--//_mutex_wait_scheme=1模式下开始1次sched_yield()调用,其后调用select许多次,2秒之后调用getrusage,如此反复循环。
--//_mutex_wait_scheme=1模式调用,调用select仅仅0.001秒。
--//测试mutex_wait_scheme=1,修改_mutex_wait_time的情况。按照前面介绍应该是select每次_mutex_wait_time,单位毫秒。
SYS@book> alter system set "_mutex_wait_time"=20 scope=memory;
System altered.
--//重复测试:
$ egrep "getrusage" 0205mutexa.txt | head -20
16:05:27.368592 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 381378}, ...}) = 0 <0.000009>
16:05:28.756046 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 554249}, ru_stime={0, 382136}, ...}) = 0 <0.000027>
16:05:30.777212 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 555697}, ru_stime={0, 386260}, ...}) = 0 <0.000046>
16:05:32.809704 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 556296}, ru_stime={0, 389981}, ...}) = 0 <0.000029>
16:05:34.808683 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 558242}, ru_stime={0, 390338}, ...}) = 0 <0.000157>
16:05:36.813974 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 561839}, ru_stime={0, 393287}, ...}) = 0 <0.000030>
16:05:38.817664 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 563650}, ru_stime={0, 397445}, ...}) = 0 <0.000038>
16:05:40.821995 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 564663}, ru_stime={0, 401474}, ...}) = 0 <0.000145>
16:05:42.858149 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 569048}, ru_stime={0, 403980}, ...}) = 0 <0.000033>
16:05:44.856437 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 571326}, ru_stime={0, 405598}, ...}) = 0 <0.000080>
16:05:46.873433 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 573060}, ru_stime={0, 407250}, ...}) = 0 <0.000017>
16:05:48.876284 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 576291}, ru_stime={0, 410389}, ...}) = 0 <0.000018>
16:05:50.910784 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 578221}, ru_stime={0, 410726}, ...}) = 0 <0.000015>
16:05:52.890046 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 578221}, ru_stime={0, 413457}, ...}) = 0 <0.000029>
16:05:54.904141 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 581566}, ru_stime={0, 413947}, ...}) = 0 <0.000028>
16:05:56.913854 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 584040}, ru_stime={0, 415708}, ...}) = 0 <0.000046>
16:05:58.895999 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 585411}, ru_stime={0, 418569}, ...}) = 0 <0.000008>
16:05:58.897166 getrusage(0x1 /* RUSAGE_??? */, {ru_utime={0, 585536}, ru_stime={0, 418658}, ...}) = 0 <0.000009>
--//间隔还是2秒。
--//除非设置"_mutex_wait_time">2000,估计没人这么做。
$ awk '{print $2}' 0205mutexa.txt |egrep "semtimedop|getrusage|sched_yield|select" | uniq -c | head -10
1 getrusage(0x1
1 sched_yield()
35 select(0,
1 getrusage(0x1
76 select(0,
1 getrusage(0x1
62 select(0,
1 getrusage(0x1
42 select(0,
1 getrusage(0x1
$ grep "select" 0205mutexa.txt |head -4
16:05:27.368878 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.028785>
16:05:27.397848 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.031134>
16:05:27.429072 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.021033>
16:05:27.450178 select(0, [], [], [], {0, 20000}) = 0 (Timeout) <0.040958>
--//休眠的时间单位微秒(1/10^6秒),20000微秒= 20000/10^6 = 0.020秒。select休眠的时间20毫秒。
--//符合前面介绍的_mutex_wait_scheme = 1 & _mutex_wait_time = t – Always SLEEP for t milli-seconds 一致。
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】凌霞软件回馈社区,博客园 & 1Panel & Halo 联合会员上线
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】博客园社区专享云产品让利特惠,阿里云新客6.5折上折
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 一个费力不讨好的项目,让我损失了近一半的绩效!
· 清华大学推出第四讲使用 DeepSeek + DeepResearch 让科研像聊天一样简单!
· 实操Deepseek接入个人知识库
· CSnakes vs Python.NET:高效嵌入与灵活互通的跨语言方案对比
· Plotly.NET 一个为 .NET 打造的强大开源交互式图表库
2024-02-19 [20240219]建立完善sql_idx.sh脚本.txt
2019-02-19 [20190219]那个更快(11g).txt