[20241108]跟踪library cache lock library cache pin使用gdb(11g)4.txt
[20241108]跟踪library cache lock library cache pin使用gdb(11g)4.txt
--//验证前面建立的gdb脚本确定library cache pin address是否正确.
1.环境:
SCOTT@book> @ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
--//grant execute on sys.dbms_lock to scott;
create procedure lcp
is
begin
sys.dbms_lock.sleep(3600);
end;
/
2.测试:
--//session 1:
SCOTT@book> exec lcp()
--//session 2:
SCOTT@book> @spid
==============================
SID : 53
SERIAL# : 31
PROCESS : 20523
SERVER : DEDICATED
SPID : 20524
PID : 27
P_SERIAL# : 15
KILL_COMMAND : alter system kill session '53,31' immediate;
PL/SQL procedure successfully completed.
SCOTT@book> set timing on
--//window 2:
$ rlgdb -f -p 20524 -x lkpn11g.gdb
--//session 2:
SCOTT@book> alter procedure lcp compile;
--//这样编译挂起!!
--//window 2:
kglGetS0 return pin address : 000000007ce3fdb8 000000007ce3fdb8
kgllkal count 49 -- handle address: 000000007c468fc8, mode: 3 kglnaobj address:0x7c469170: "LCPSCOTT\210覭|"
kglGetS0 return lock address : 000000007c13b6d0 000000007c13b6d0
kglpnal count 22 -- handle address: 000000007c468fc8, mode: 3 kglnaobj address:0x7c469170: "LCPSCOTT\210覭|"
kglGetS0 return pin address : 000000007c13b4d0 000000007c13b4d0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//session 4:
SYS@book> @ ashtop event,p1raw,p2raw,p3raw 1=1 &min
Total Distinct Distinct Distinct
Seconds AAS %This EVENT P1RAW P2RAW P3RAW FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ----------------- ----------------- ----------------- ------------------- ------------------- ---------- -------- -----------
60 1.0 100% | library cache pin 000000007C468FC8 000000007C13B4D0 0001759800010003 2024-11-08 10:29:53 2024-11-08 10:30:52 1 60 1
--//P2raw就是pin的地址000000007C13B4D0,可以发现完全对上,没有问题.
SYS@book> @ ev_namepr 'library cache pin'
==============================
EVENT# : 286
EVENT_ID : 2802704141
NAME : library cache pin
PARAMETER1 : handle address
PARAMETER2 : pin address
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PARAMETER3 : 100*mode+namespace
WAIT_CLASS_ID : 3875070507
WAIT_CLASS# : 4
WAIT_CLASS : Concurrency
PL/SQL procedure successfully completed.
--//P1=000000007C468FC8对应句柄地址.
$ ./ext_kglobz.sh 000000007C468FC8 '' 1a8
0x7c469170: "LCPSCOTT\210覭|"
SYS@book> select * from x$kglpn where KGLpnadr='000000007C13B4D0';
ADDR INDX INST_ID KGLPNADR KGLPNUSE KGLPNSES KGLPNSID KGLPNHDL KGLPNLCK KGLPNCNT KGLPNMOD KGLPNREQ KGLPNFLG KGLPNDMK KGLPNSPN KGLNAHSH
---------------- ---------- ---------- ---------------- ---------------- ---------------- ---------- ---------------- ---------------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
00007FAF5A01ED08 2 1 000000007C13B4D0 0000000085D109F8 0000000085D109F8 53 000000007C468FC8 00 0 0 3 4096 0 1077 3309827384
--//奇怪,oracle这个x表竟然没有类似x$kgllk.KGLNAOBJ字段.
SYS@book> @ sharepool/shp4 '' 3309827384
HANDLE_TYPE KGLHDADR KGLHDPAR C40 KGLHDLMD KGLHDPMD KGLHDIVC KGLOBHD0 KGLOBHD6 KGLOBHS0 KGLOBHS6 KGLOBT16 N0_6_16 N20 KGLNAHSH KGLOBT03 KGLOBT09
---------------------- ---------------- ---------------- ---------------------------------------- ---------- ---------- ---------- ---------------- ---------------- ---------- ---------- ---------- --------- ---------- ---------- ------------- ----------
parent handle address 000000007C468FC8 000000007C468FC8 LCP.SCOTT 3 2 0 000000007BF0ED90 00 4688 0 0 4688 16976 3309827384 0
--//一样可以定位对象是LCP.SCOTT.
3.继续:
--//如果在打开一个会话执行编译,出现library cache lock:
--//session 3:
SCOTT@book> @ spid
==============================
SID : 138
SERIAL# : 9
PROCESS : 20610
SERVER : DEDICATED
SPID : 20611
PID : 32
P_SERIAL# : 5
KILL_COMMAND : alter system kill session '138,9' immediate;
PL/SQL procedure successfully completed.
--//window 3:
$ rlgdb -f -p 20611 -x lkpn11g.gdb
--//session 3:
alter procedure lcp compile;
--//window 3,按c继续:
...
kglGetS0 return lock address : 000000007c1da1e0 000000007c1da1e0
kgllkal count 24 -- handle address: 000000007c08b128, mode: 1 kglnaobj address:0x7c08b2d0: ""
kglGetS0 return lock address : 000000007c1da0e0 000000007c1da0e0
kgllkal count 25 -- handle address: 000000007c468fc8, mode: 3 kglnaobj address:0x7c469170: "LCPSCOTT8括}"
kglGetS0 return lock address : 000000007c1d9fe0 000000007c1d9fe0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//lock address=000000007c1d9fe0
--//session 4:
SYS@book> @ ashtop event,p1raw,p2raw,p3raw 1=1 &min
Total Distinct Distinct Distinct
Seconds AAS %This EVENT P1RAW P2RAW P3RAW FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ----------------- ----------------- ----------------- ------------------- ------------------- ---------- -------- -----------
60 1.0 50% | library cache lock 000000007C468FC8 000000007C1D9FE0 0001759800010003 2024-11-08 10:42:01 2024-11-08 10:43:00 1 60 1
60 1.0 50% | library cache pin 000000007C468FC8 000000007C13B4D0 0001759800010003 2024-11-08 10:42:01 2024-11-08 10:43:00 1 60 1
1 .0 1% | 2024-11-08 10:42:30 2024-11-08 10:42:30 1 1 1
--//EVENT=library cache lock,P1都是一样,P2=000000007C1D9FE0,与前面gdb的跟踪一致.
--//顺便解析P3,PARAMETER3 : 100*mode+namespace
--//拆解几个部分 00017598 0001 0003 ,第2部分是namespace ,第3部分mode,注意intel系列cpu的大小头问题.
--//前面第一部分实际上lcp对象的object_id.
SYS@book> @ o2 scott.lcp
owner object_name object_type SEG_PART_NAME status OID D_OID CREATED LAST_DDL_TIME
------------------------- ------------------------------ -------------------- -------------------- --------- ---------- ---------- ------------------- -------------------
SCOTT LCP PROCEDURE VALID 95640 2024-11-08 10:25:26 2024-11-08 10:25:26
--//95640 = 0x17598,正好对应对象的Object_id.如果阻塞是sql语句,这部分不存在的.
4.整理后gdb代码如下:
--//注意:每个测试环境不同,调用kglGetSO的返回地址会发生变化的,你必须根据自己的测试环境修改代码。
--//我不知道如何写脚本实现调用kglGetSO返回时,获取寄存器值。
$ cat lkpn11g.gdb2
set pagination off
set print repeats 0
set print elements 0
set logging file /tmp/lkpn.log
set logging overwrite on
set logging on
set $lk = 0
set $pn = 0
set $lock = 0
#break kgllkal if $rdx==3
#break kgllkal if ( $rdx==3 && $rsi==0x00000000670C9E58 )
#break kgllkal if $rsi==0x00000000670C9E58
break kgllkal
commands
silent
printf "kgllkal count %02d -- handle address: %016x, mode: %d ", ++$lk ,$rsi ,$rdx
echo kglnaobj address:
x/s $rsi+0x1a8
c
end
#break kglpnal if $rcx==3
break kglpnal
commands
silent
printf "kglpnal count %02d -- handle address: %016x, mode: %d ", ++$pn ,$rsi ,$rdx
echo kglnaobj address:
x/s $rsi+0x1a8
c
end
#break kglGetSO
#commands
# silent
# finish
# end
break *0x000000000983db73
commands
silent
printf "kglGetS0 return lock address : %016x %016x\n", $rax,$rdx
c
end
break *0x000000000983a048
commands
silent
printf "kglGetS0 return pin address : %016x %016x\n", $rax,$rdx
c
end
--//验证前面建立的gdb脚本确定library cache pin address是否正确.
1.环境:
SCOTT@book> @ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
--//grant execute on sys.dbms_lock to scott;
create procedure lcp
is
begin
sys.dbms_lock.sleep(3600);
end;
/
2.测试:
--//session 1:
SCOTT@book> exec lcp()
--//session 2:
SCOTT@book> @spid
==============================
SID : 53
SERIAL# : 31
PROCESS : 20523
SERVER : DEDICATED
SPID : 20524
PID : 27
P_SERIAL# : 15
KILL_COMMAND : alter system kill session '53,31' immediate;
PL/SQL procedure successfully completed.
SCOTT@book> set timing on
--//window 2:
$ rlgdb -f -p 20524 -x lkpn11g.gdb
--//session 2:
SCOTT@book> alter procedure lcp compile;
--//这样编译挂起!!
--//window 2:
kglGetS0 return pin address : 000000007ce3fdb8 000000007ce3fdb8
kgllkal count 49 -- handle address: 000000007c468fc8, mode: 3 kglnaobj address:0x7c469170: "LCPSCOTT\210覭|"
kglGetS0 return lock address : 000000007c13b6d0 000000007c13b6d0
kglpnal count 22 -- handle address: 000000007c468fc8, mode: 3 kglnaobj address:0x7c469170: "LCPSCOTT\210覭|"
kglGetS0 return pin address : 000000007c13b4d0 000000007c13b4d0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//session 4:
SYS@book> @ ashtop event,p1raw,p2raw,p3raw 1=1 &min
Total Distinct Distinct Distinct
Seconds AAS %This EVENT P1RAW P2RAW P3RAW FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ----------------- ----------------- ----------------- ------------------- ------------------- ---------- -------- -----------
60 1.0 100% | library cache pin 000000007C468FC8 000000007C13B4D0 0001759800010003 2024-11-08 10:29:53 2024-11-08 10:30:52 1 60 1
--//P2raw就是pin的地址000000007C13B4D0,可以发现完全对上,没有问题.
SYS@book> @ ev_namepr 'library cache pin'
==============================
EVENT# : 286
EVENT_ID : 2802704141
NAME : library cache pin
PARAMETER1 : handle address
PARAMETER2 : pin address
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
PARAMETER3 : 100*mode+namespace
WAIT_CLASS_ID : 3875070507
WAIT_CLASS# : 4
WAIT_CLASS : Concurrency
PL/SQL procedure successfully completed.
--//P1=000000007C468FC8对应句柄地址.
$ ./ext_kglobz.sh 000000007C468FC8 '' 1a8
0x7c469170: "LCPSCOTT\210覭|"
SYS@book> select * from x$kglpn where KGLpnadr='000000007C13B4D0';
ADDR INDX INST_ID KGLPNADR KGLPNUSE KGLPNSES KGLPNSID KGLPNHDL KGLPNLCK KGLPNCNT KGLPNMOD KGLPNREQ KGLPNFLG KGLPNDMK KGLPNSPN KGLNAHSH
---------------- ---------- ---------- ---------------- ---------------- ---------------- ---------- ---------------- ---------------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
00007FAF5A01ED08 2 1 000000007C13B4D0 0000000085D109F8 0000000085D109F8 53 000000007C468FC8 00 0 0 3 4096 0 1077 3309827384
--//奇怪,oracle这个x表竟然没有类似x$kgllk.KGLNAOBJ字段.
SYS@book> @ sharepool/shp4 '' 3309827384
HANDLE_TYPE KGLHDADR KGLHDPAR C40 KGLHDLMD KGLHDPMD KGLHDIVC KGLOBHD0 KGLOBHD6 KGLOBHS0 KGLOBHS6 KGLOBT16 N0_6_16 N20 KGLNAHSH KGLOBT03 KGLOBT09
---------------------- ---------------- ---------------- ---------------------------------------- ---------- ---------- ---------- ---------------- ---------------- ---------- ---------- ---------- --------- ---------- ---------- ------------- ----------
parent handle address 000000007C468FC8 000000007C468FC8 LCP.SCOTT 3 2 0 000000007BF0ED90 00 4688 0 0 4688 16976 3309827384 0
--//一样可以定位对象是LCP.SCOTT.
3.继续:
--//如果在打开一个会话执行编译,出现library cache lock:
--//session 3:
SCOTT@book> @ spid
==============================
SID : 138
SERIAL# : 9
PROCESS : 20610
SERVER : DEDICATED
SPID : 20611
PID : 32
P_SERIAL# : 5
KILL_COMMAND : alter system kill session '138,9' immediate;
PL/SQL procedure successfully completed.
--//window 3:
$ rlgdb -f -p 20611 -x lkpn11g.gdb
--//session 3:
alter procedure lcp compile;
--//window 3,按c继续:
...
kglGetS0 return lock address : 000000007c1da1e0 000000007c1da1e0
kgllkal count 24 -- handle address: 000000007c08b128, mode: 1 kglnaobj address:0x7c08b2d0: ""
kglGetS0 return lock address : 000000007c1da0e0 000000007c1da0e0
kgllkal count 25 -- handle address: 000000007c468fc8, mode: 3 kglnaobj address:0x7c469170: "LCPSCOTT8括}"
kglGetS0 return lock address : 000000007c1d9fe0 000000007c1d9fe0
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//lock address=000000007c1d9fe0
--//session 4:
SYS@book> @ ashtop event,p1raw,p2raw,p3raw 1=1 &min
Total Distinct Distinct Distinct
Seconds AAS %This EVENT P1RAW P2RAW P3RAW FIRST_SEEN LAST_SEEN Execs Seen Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ----------------- ----------------- ----------------- ------------------- ------------------- ---------- -------- -----------
60 1.0 50% | library cache lock 000000007C468FC8 000000007C1D9FE0 0001759800010003 2024-11-08 10:42:01 2024-11-08 10:43:00 1 60 1
60 1.0 50% | library cache pin 000000007C468FC8 000000007C13B4D0 0001759800010003 2024-11-08 10:42:01 2024-11-08 10:43:00 1 60 1
1 .0 1% | 2024-11-08 10:42:30 2024-11-08 10:42:30 1 1 1
--//EVENT=library cache lock,P1都是一样,P2=000000007C1D9FE0,与前面gdb的跟踪一致.
--//顺便解析P3,PARAMETER3 : 100*mode+namespace
--//拆解几个部分 00017598 0001 0003 ,第2部分是namespace ,第3部分mode,注意intel系列cpu的大小头问题.
--//前面第一部分实际上lcp对象的object_id.
SYS@book> @ o2 scott.lcp
owner object_name object_type SEG_PART_NAME status OID D_OID CREATED LAST_DDL_TIME
------------------------- ------------------------------ -------------------- -------------------- --------- ---------- ---------- ------------------- -------------------
SCOTT LCP PROCEDURE VALID 95640 2024-11-08 10:25:26 2024-11-08 10:25:26
--//95640 = 0x17598,正好对应对象的Object_id.如果阻塞是sql语句,这部分不存在的.
4.整理后gdb代码如下:
--//注意:每个测试环境不同,调用kglGetSO的返回地址会发生变化的,你必须根据自己的测试环境修改代码。
--//我不知道如何写脚本实现调用kglGetSO返回时,获取寄存器值。
$ cat lkpn11g.gdb2
set pagination off
set print repeats 0
set print elements 0
set logging file /tmp/lkpn.log
set logging overwrite on
set logging on
set $lk = 0
set $pn = 0
set $lock = 0
#break kgllkal if $rdx==3
#break kgllkal if ( $rdx==3 && $rsi==0x00000000670C9E58 )
#break kgllkal if $rsi==0x00000000670C9E58
break kgllkal
commands
silent
printf "kgllkal count %02d -- handle address: %016x, mode: %d ", ++$lk ,$rsi ,$rdx
echo kglnaobj address:
x/s $rsi+0x1a8
c
end
#break kglpnal if $rcx==3
break kglpnal
commands
silent
printf "kglpnal count %02d -- handle address: %016x, mode: %d ", ++$pn ,$rsi ,$rdx
echo kglnaobj address:
x/s $rsi+0x1a8
c
end
#break kglGetSO
#commands
# silent
# finish
# end
break *0x000000000983db73
commands
silent
printf "kglGetS0 return lock address : %016x %016x\n", $rax,$rdx
c
end
break *0x000000000983a048
commands
silent
printf "kglGetS0 return pin address : %016x %016x\n", $rax,$rdx
c
end