[20240827]分析为什么出现library cache lock等待事件2.txt

[20240827]分析为什么出现library cache lock等待事件2.txt

--//前几天一直在分析如果表不存在的情况下,密集执行为什么出现library cache lock等待事件,而且出现的mode=2(共享模式),按照道
--//理不应该阻塞,做一个分析.

1.环境:
SCOTT@book01p> @ver2
==============================
PORT_STRING                   : x86_64/Linux 2.4.xx
VERSION                       : 21.0.0.0.0
BANNER                        : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
BANNER_FULL                   : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
Version 21.3.0.0.0
BANNER_LEGACY                 : Oracle Database 21c Enterprise Edition Release 21.0.0.0.0 - Production
CON_ID                        : 0
PL/SQL procedure successfully completed.

2.首先定位该对象handle address,编写gdb脚本:
$ cat lcl.gdb
set $lk  = 0
break kgllkal
commands
silent
printf "kgllkal count %02d -- handle address: %016x, mode: %d ", ++$lk ,$rdx ,$rcx
echo kglnaobj address:
x/s $rdx+0x1c8
finish
end

--//session 1:
SCOTT@book01p(290,36393)> @ spid
 SID SERIAL# PROCESS SERVER    SPID PID  P_SERIAL# C50
---- ------- ------- --------- ---- --- ---------- --------------------------------------------------
 290   36393 3803    DEDICATED 3805  62          2 alter system kill session '290,36393' immediate;

--//window 1: (注:表示打开新的bash shell终端,与上面区分开来)
$ rlgdb -f -p 3805 -x lcl.gdb
...

--//session 1:
SCOTT@book01p(290,36393)> select count(1) from deptxxx;

--//window 1:按不断c继续:
(gdb) c
Continuing.
kgllkal count 04 -- handle address: 000000006a65df50, mode: 2 kglnaobj address:0x6a65e118:      "aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"
0x0000000015363ad0 in kglLock ()
--//出现"aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"记下handle address: 000000006a65df50.
--//退出gdb界面.
--//修改lcl.gdb文件;
$ cat lcl.gdb
set $lk  = 0
break kgllkal if ( $rdx==0x000000006a65df50 )
#break kgllkal
commands
silent
printf "kgllkal count %02d -- handle address: %016x, mode: %d ", ++$lk ,$rdx ,$rcx
echo kglnaobj address:
x/s $rdx+0x1c8
finish
end

--//window 1,再次重启gdb检测:
$ rlgdb -f -p 3805 -x lcl.gdb

--//session 2:
SCOTT@book01p(151,22503)> @ spid
 SID  SERIAL# PROCESS SERVER    SPID PID  P_SERIAL# C50
---- -------- ------- --------- ---- --- ---------- --------------------------------------------------
 151    22503 4065    DEDICATED 4067  65          2 alter system kill session '151,22503' immediate;

--//window 2:
--//window 1检测session 1.window 2检测session 2.
$ rlgdb -f -p 4067 -x lcl.gdb

--//session 1:
SCOTT@book01p(290,36393)> select count(1) from deptxxx;
--//挂起!!

--//window 1,按c继续:
Breakpoint 1 at 0x15367e90
(gdb) c
Continuing.
kgllkal count 01 -- handle address: 000000006a65df50, mode: 2 kglnaobj address:0x6a65e118:      "aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"
0x0000000015363ad0 in kglLock ()
--//停在kglLock.

--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
       290          2          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50

--//session 2:
SCOTT@book01p(151,22503)> select count(1) from deptxxx;
--//挂起!!

--//window 2,按c继续:
Breakpoint 1 at 0x15367e90
(gdb) c
Continuing.
kgllkal count 01 -- handle address: 000000006a65df50, mode: 2 kglnaobj address:0x6a65e118:      "aa0b8a5e997323ce2d65f9b7af91ed4f$BUILD$BOOK01P"
0x0000000015363ad0 in kglLock ()
--//停在kglLock.

--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));

       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
       290          2          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       151          2          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
--//现在sid=151,290同时持有00000000701E7A28的library cache lock,mode=2,说明共享模式没有问题.

SYS@book> @ ashtop event,p1,p1hex,p3hex "event='library cache lock'" &10s
no rows selected
--//现在并没有阻塞.出现等待事件在library cache lock.

--//window 1,按c继续:
(gdb) c
Continuing.

--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));

       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
       290          0          3 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       151          2          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
--//sid=290开始释放library cache lock,但是还没有释放,KGLLKMOD=0,KGLLKREQ=3,请求mode=3.

--//window 1,如果等几十秒,你可以发现如下信息;
(gdb) c
Continuing.

Program received signal SIGUSR2, User defined signal 2.
0x00007f65c72a0fca in semtimedop () at ../sysdeps/unix/syscall-template.S:81
/usr/src/debug/glibc-2.17-c758a686/sysdeps/unix/syscall-template.S:81:3374:beg:0x7f65c72a0fca
--//sesson 1进入休眠模式,注意信号semtimedop.

(gdb) bt 8
#0  0x00007f65c72a0fca in semtimedop () at ../sysdeps/unix/syscall-template.S:81
#1  0x0000000015339805 in sskgpwwait ()
#2  0x0000000015334595 in skgpwwait ()
#3  0x000000001498b09e in ksliwat ()
#4  0x000000001498a4ba in kslwaitctx ()
#5  0x000000000591f453 in kglLockWait ()
#6  0x00000000058f9e65 in kglUpgradeLock ()
#7  0x0000000003ffce7c in kksGetBuildLock ()
(More stack frames follow...)

--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
       290          0          3 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       151          2          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50

SYS@book> @ ashtop event,p1,p1hex,p3hex,sid 1=1 &10s

    Total                                                                                                                                                                Distinct Distinct    Distinct
  Seconds     AAS %This   EVENT                                              P1 P1HEX             P3HEX                    SID FIRST_SEEN          LAST_SEEN           Execs Seen  Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ---------- ----------------- ----------------- ---------- ------------------- ------------------- ---------- -------- -----------
       10     1.0   37% | library cache lock                         1785061200  000000006A65DF50  0000000000520003        290 2024-08-27 10:00:27 2024-08-27 10:00:36          1       10          10
       10     1.0   37% |                                            1413697536                                            151 2024-08-27 10:00:27 2024-08-27 10:00:36          1       10          10
        5      .5   19% |                                                   100                                            132 2024-08-27 10:00:27 2024-08-27 10:00:36          1        5           5
        2      .2    7% |                                                   100                                            261 2024-08-27 10:00:29 2024-08-27 10:00:34          1        2           2
--//实际上现在sid=290,P3HEX=0000000000520003,也就是释放时是尝试持有mode=3,排他模式.

SYS@book> @ wcx &10s
-- Display ASH Wait Chain Signatures script v0.7 by Tanel Poder ( http://blog.tanelpoder.com )
%This     SECONDS     AAS WAIT_CHAIN                                                               FIRST_SEEN          LAST_SEEN
------ ---------- ------- ------------------------------------------------------------------------ ------------------- -------------------
  38%          10     1.0 -> ,,@=>151,22503,@1=>                                                   2024-08-27 10:00:55 2024-08-27 10:01:04
  38%          10     1.0 -> 151,22503,@1=>290,36393,@1=>library cache lock -> ,,@=>151,22503,@1=> 2024-08-27 10:00:55 2024-08-27 10:01:04
  19%           5      .5 -> ,,@=>132,3926,@1=>                                                    2024-08-27 10:00:57 2024-08-27 10:01:02
   4%           1      .1 -> ,,@=>270,42455,@1=>                                                   2024-08-27 10:01:04 2024-08-27 10:01:04
--//这时等待链条是 151->290->library cache lock->151.阻塞者是sid=290.

--//window 2,按c继续:
(gdb) c
Continuing.
Program received signal SIGUSR2, User defined signal 2.
0x0000000015363ad0 in kglLock ()
--//session 2停在kglLock,到session 2释放library cache lock.

(gdb) c
Continuing.

--//等一小段时间...
Program received signal SIGUSR2, User defined signal 2.
0x00007fd3f17fdfca in semtimedop () at ../sysdeps/unix/syscall-template.S:81
/usr/src/debug/glibc-2.17-c758a686/sysdeps/unix/syscall-template.S:81:3374:beg:0x7fd3f17fdfca
--//sesson 2进入休眠模式,注意信号semtimedop.

--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));

       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
       151          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       290          3          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
--//这时sid=290持有mode=3.sid=151的KGLLKREQ=2.
--//为什么这时sid=151的KGLLKREQ=3呢?也许sid=290,持有mode=3,只能请求mode=2.

SYS@book> @ ashtop event,p1,p1hex,p3hex,sid 1=1 &10s
    Total                                                                                                                                                                Distinct Distinct    Distinct
  Seconds     AAS %This   EVENT                                              P1 P1HEX             P3HEX                    SID FIRST_SEEN          LAST_SEEN           Execs Seen  Tstamps Execs Seen1
--------- ------- ------- ------------------------------------------ ---------- ----------------- ----------------- ---------- ------------------- ------------------- ---------- -------- -----------
       10     1.0   45% | library cache lock                         1785061200  000000006A65DF50  0000000000520002        151 2024-08-27 10:05:47 2024-08-27 10:05:56          1       10          10
       10     1.0   45% | library cache lock                         1785061200  000000006A65DF50  0000000000520003        290 2024-08-27 10:05:47 2024-08-27 10:05:56          1       10          10
        1      .1    5% |                                                     3                                              5 2024-08-27 10:05:54 2024-08-27 10:05:54          1        1           1
        1      .1    5% |                                                   100                                            132 2024-08-27 10:05:53 2024-08-27 10:05:53          1        1           1
--//sid=151 看到P3hex=0000000000520002.

--//window 1,按c继续:
(gdb) c
Continuing.

--//session 1:
SCOTT@book01p(290,36393)> select count(1) from deptxxx;
select count(1) from deptxxx
                     *
ERROR at line 1:
ORA-00942: table or view does not exist
--//报错,执行完成!!

--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
       151          2          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
--//sid=151 现在持有mode=2.

--//window 2,按c继续:
(gdb) c
Continuing.

--//session 2:
SCOTT@book01p(151,22503)> select count(1) from deptxxx;
select count(1) from deptxxx
                     *
ERROR at line 1:
ORA-00942: table or view does not exist
--//报错,执行完成!!

--//session 3:
SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));
no rows selected

3.简单总结:
--//实际上问题在于后续的kglLock函数调用,当存在多个持有mode=2的情况,释放lock,请求的mode=3排他模式方式,以前一直以为kgllkal
--//持有的mode=2,不会存在阻塞.问题实际上在于后续的操作.
--//更正:实际上即使一个会话,也是请求mode=3的排他模式.

$ head -4 m2.sh ;tail -4 m2.sh
#! /bin/bash
sleep $(echo $1/500 | bc -l )
sqlplus -s -l scott/book@book01p <<EOF  >/dev/null
set feedback off
/
/
quit
EOF
--//中间20000个/.

$ zzdate ; seq 10 | xargs -IQ -P 5 ./m2.sh Q ;zzdate
trunc(sysdate)+10/24+43/1440+01/86400 == 2024/08/27 10:43:01

SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));

       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
       145          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       277          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       397          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
        24          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
        30          3          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50

SYS@book> select KGLLKSNM sid ,KGLLKMOD,KGLLKREQ,KGLNAOBJ,KGLLKHDL from x$kgllk where KGLLKHDL=hextoraw(upper('000000006a65df50'));

       SID   KGLLKMOD   KGLLKREQ KGLNAOBJ                                 KGLLKHDL
---------- ---------- ---------- ---------------------------------------- ----------------
        17          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
        24          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       397          0          2 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       145          0          3 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50
       277          2          0 aa0b8a5e997323ce2d65f9b7af91ed4f         000000006A65DF50


posted @ 2024-09-01 17:11  lfree  阅读(10)  评论(0编辑  收藏  举报