[20210316]MSSM表空间块ITL的LCK 3.txt
[20210316]MSSM表空间块ITL的LCK 3.txt
--//以前的测试,链接:http://blog.itpub.net/267265/viewspace-2564734/=>[20190125]MSSM表空间块ITL的LCK.txt
--//昨天遇到的问题ORA-04000 the sum of PCTUSED and PCTFREE cannot exceed 100,链接:http://blog.itpub.net/267265/viewspace-2762819/
--//想想是否与这个问题相关呢?这个也是mssm与assm的不同的地方,我决定测试看看。
1.环境:
SCOTT@book> @ ver1
PORT_STRING VERSION BANNER
------------------------------ -------------- --------------------------------------------------------------------------------
x86_64/Linux 2.4.xx 11.2.0.4.0 Oracle Database 11g Enterprise Edition Release 11.2.0.4.0 - 64bit Production
CREATE TABLESPACE MSSM DATAFILE
'/mnt/ramdisk/book/mssm01.dbf' SIZE 40M AUTOEXTEND ON NEXT 16M MAXSIZE UNLIMITED
LOGGING
ONLINE
EXTENT MANAGEMENT LOCAL AUTOALLOCATE
BLOCKSIZE 8K
SEGMENT SPACE MANAGEMENT MANUAL
FLASHBACK ON;
2.测试:
SCOTT@book> create table t tablespace mssm pctfree 99 pctused 1 as select level id, rpad(level, 3500, 'X') vc from dual connect by level <= 1;
Table created.
--//注:缺省pctfree=10,PCTUSED=40.
SCOTT@book> @ desc t
Name Null? Type
----- -------- ----------------------------
ID NUMBER
VC VARCHAR2(4000)
SCOTT@book> select rowid ,id,substr(vc,1,2) from t;
ROWID ID SUBS
------------------ ---------- ----
AAAWMkAAHAAAACBAAA 1 1X
SCOTT@book> @ rowid AAAWMkAAHAAAACBAAA
OBJECT FILE BLOCK ROW ROWID_DBA DBA TEXT
---------- ---------- ---------- ---------- -------------------- -------------------- ----------------------------------------
90916 7 129 0 0x1C00081 7,129 alter system dump datafile 7 block 129 ;
SCOTT@book> delete from t where id=1;
1 row deleted.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter system flush buffer_cache ;
System altered.
SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.
--//检查转储发现:
Block header dump: 0x01c00081
Object id on Block? Y
seg/obj: 0x16324 csc: 0x03.177d9219 itc: 3 flg: O typ: 1 - DATA
fsl: 2 fnx: 0x0 ver: 0x01
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.177d9219
0x02 0x000a.00b.00005d86 0x00c00b60.11db.0b --U- 2 fsc 0x0db3.177d9231
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03 0x0000.000.00000000 0x00000000.0000.00 ---- 0 fsc 0x0000.00000000
--//实际上删除1条记录,而ITL=0x02的LCK记录的是2.这个是我以前遇到的情况。
3.继续测试:
--//drop table t purge;
SCOTT@book> create table t tablespace mssm pctfree 40 pctused 1 as select level id, rpad('X', 2000, 'X') vc from dual connect by level <= 4;
Table created.
SCOTT@book> @ desc t
Name Null? Type
----- -------- ----------------------------
ID NUMBER
VC VARCHAR2(2000)
SCOTT@book> select rowid ,id,substr(vc,1,2) from t;
ROWID ID SUBS
------------------ ---------- ----
AAAWMnAAHAAAACBAAA 1 XX
AAAWMnAAHAAAACBAAB 2 XX
AAAWMnAAHAAAACCAAA 3 XX
AAAWMnAAHAAAACCAAB 4 XX
SCOTT@book> delete from t where id=1;
1 row deleted.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter system flush buffer_cache ;
System altered.
SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.
--//检查转储发现:
Block header dump: 0x01c00081
Object id on Block? Y
seg/obj: 0x16327 csc: 0x03.177d9893 itc: 3 flg: - typ: 1 - DATA
fsl: 0 fnx: 0x0 ver: 0x01
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.177d9893
0x02 0x000a.00f.00005d89 0x00c00b6c.11db.0a --U- 1 fsc 0x07d7.177d98a6
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03 0x0000.000.00000000 0x00000000.0000.00 ---- 0 fsc 0x0000.00000000
--//注意看下划线LCK=1,而不是前面的2.
SCOTT@book> delete from t where id=2;
1 row deleted.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter system flush buffer_cache ;
System altered.
SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.
--//检查转储发现:
Block header dump: 0x01c00081
Object id on Block? Y
seg/obj: 0x16327 csc: 0x03.177d9893 itc: 3 flg: O typ: 1 - DATA
fsl: 3 fnx: 0x0 ver: 0x01
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.177d9893
0x02 0x000a.00f.00005d89 0x00c00b6c.11db.0a --U- 1 fsc 0x07d7.177d98a6
0x03 0x000a.011.00005cd6 0x00c00b6b.11db.1e --U- 2 fsc 0x07d7.177d9977
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--//可以发现这次删除是对应事务槽的LCK=2.也就是多产生的LCK与PCTUSER的大小相关。
--//当我删除1条记录时,还没有满足pctused=1的情况,而再删除记录时,该块已经没有记录,一定这样满足pctused=1的情况。
--//这样标识该块可以再次投入使用,一些细节我不是很清楚,这样多记录一个事务,从哪里反映出来,我还是不清楚。
4.可以通过如下验证我的推断:
--//drop table t purge;
SCOTT@book> create table t tablespace mssm pctfree 40 pctused 60 as select level id, rpad('X', 2000, 'X') vc from dual connect by level <= 4;
Table created.
--//注:pctfree=40,PCTUSED=60. 这样即使我删除1条记录也满足这个触发条件(pctused=60).
SCOTT@book> select rowid ,id,substr(vc,1,2) from t;
ROWID ID SUBS
------------------ ---------- ----
AAAWMoAAHAAAACBAAA 1 XX
AAAWMoAAHAAAACBAAB 2 XX
AAAWMoAAHAAAACCAAA 3 XX
AAAWMoAAHAAAACCAAB 4 XX
SCOTT@book> delete from t where id=2;
1 row deleted.
SCOTT@book> @ xid
XIDUSN_XIDSLOT_XIDSQN
------------------------------
6.11.2062
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter system flush buffer_cache ;
System altered.
SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.
--//检查转储发现:
Block header dump: 0x01c00081
Object id on Block? Y
seg/obj: 0x16328 csc: 0x03.177d9bae itc: 3 flg: O typ: 1 - DATA
fsl: 2 fnx: 0x0 ver: 0x01
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.177d9bae
0x02 0x0006.00b.0000080e 0x00c009bb.03c4.14 --U- 2 fsc 0x07d7.177d9c1f
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03 0x0000.000.00000000 0x00000000.0000.00 ---- 0 fsc 0x0000.00000000
--//继续删除看看:
SCOTT@book> delete from t where id=1;
1 row deleted.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter system checkpoint ;
System altered.
SCOTT@book> alter system dump datafile 7 block 129 ;
System altered.
--//检查转储发现:
Block header dump: 0x01c00081
Object id on Block? Y
seg/obj: 0x16328 csc: 0x03.177d9bae itc: 3 flg: O typ: 1 - DATA
fsl: 2 fnx: 0x0 ver: 0x01
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.177d9bae
0x02 0x0006.00b.0000080e 0x00c009bb.03c4.14 --U- 2 fsc 0x07d7.177d9c1f
0x03 0x000a.007.00005d7c 0x00c00b6f.11db.0c --U- 1 fsc 0x07d7.177d9c92
--//也就是检测1次。这也是我前面测试遇到的情况。
5.测试到这里就可以知道理论控制pctused的大小就可以控制LCK产生的数量。
或者这样将如果事务的dml没有触发满足pctused的条件,LCK就不会原来事务的基础上增加1。
SCOTT@book> alter table t pctused 10;
Table altered.
SCOTT@book> delete from t where id=3;
1 row deleted.
SCOTT@book> @ xid
XIDUSN_XIDSLOT_XIDSQN
------------------------------
5.2.1925
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter system checkpoint ;
System altered.
SCOTT@book> alter system dump datafile 7 block 130 ;
System altered.
--//检查转储发现:
Block header dump: 0x01c00082
Object id on Block? Y
seg/obj: 0x16328 csc: 0x03.177d9bae itc: 3 flg: - typ: 1 - DATA
fsl: 0 fnx: 0x0 ver: 0x01
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.177d9bae
0x02 0x0005.002.00000785 0x00c000c7.0547.19 --U- 1 fsc 0x07d7.177d9f0c
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
0x03 0x0000.000.00000000 0x00000000.0000.00 ---- 0 fsc 0x0000.00000000
--//这也再次验证我的判断,但是有一点我一直没明白,oracle如何知道还有一个事务呢?比如我再删除id=4.
--//这时对应的ITL槽LCK记录的一定是2.
SCOTT@book> delete from t where id=4;
1 row deleted.
SCOTT@book> commit ;
Commit complete.
SCOTT@book> alter system checkpoint ;
System altered.
SCOTT@book> alter system dump datafile 7 block 130 ;
System altered.
--//检查转储发现:
Block header dump: 0x01c00082
Object id on Block? Y
seg/obj: 0x16328 csc: 0x03.177d9bae itc: 3 flg: O typ: 1 - DATA
fsl: 3 fnx: 0x0 ver: 0x01
Itl Xid Uba Flag Lck Scn/Fsc
0x01 0xffff.000.00000000 0x00000000.0000.00 C--- 0 scn 0x0003.177d9bae
0x02 0x0005.002.00000785 0x00c000c7.0547.19 --U- 1 fsc 0x07d7.177d9f0c
0x03 0x000a.010.00005d8d 0x00c00b70.11db.0c --U- 2 fsc 0x07d7.177da008
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
6.如果你通过bbed修改itl槽信息会出现什么情况呢?
BBED> p dba 7,130 ktbbh.ktbbhitl[2]
struct ktbbhitl[2], 24 bytes @92
struct ktbitxid, 8 bytes @92
ub2 kxidusn @92 0x000a
ub2 kxidslt @94 0x0010
ub4 kxidsqn @96 0x00005d8d
struct ktbituba, 8 bytes @100
ub4 kubadba @100 0x00c00b70
ub2 kubaseq @104 0x11db
ub1 kubarec @106 0x0c
ub2 ktbitflg @108 0x2002 (KTBFUPB)
union _ktbitun, 2 bytes @110
sb2 _ktbitfsc @110 2007
ub2 _ktbitwrp @110 0x07d7
ub4 ktbitbas @112 0x177da008
BBED> assign dba 7,130 ktbbh.ktbbhitl[2].ktbitflg=0x2001
ub2 ktbitflg @108 0x2001 (KTBFUPB)
--//注:相当于修改该ITL槽的LCK=1.
BBED> sum apply
Check value for File 7, Block 130:
current = 0xfae4, required = 0xfae4
BBED> verify
DBVERIFY - Verification starting
FILE = /mnt/ramdisk/book/mssm01.dbf
BLOCK = 130
Block Checking: DBA = 29360258, Block Type = KTB-managed data block
data header at 0x1f41e74
kdbchk: xaction header lock count mismatch
trans=3 ilk=1 nlo=2
Block 130 failed with check code 6108
--//你可以发现检查时报错,oracle的检查如何知道LCK应该是2呢?在该块中有什么或者标识反映这样的变化呢?
--//难道在段头dba=7,128处。
BBED> p dba 7,128 ktsfs_txn[0]
struct ktsfs_txn[0], 20 bytes @4176
ub2 ktsfsflg @4176 0x0001 (KTSUSED)
struct ktsfsxid, 8 bytes @4180
ub2 kxidusn @4180 0x0006
ub2 kxidslt @4182 0x000b
ub4 kxidsqn @4184 0x0000080e
ub4 ktsfslhd @4188 0x01c00081
ub4 ktsfsltl @4192 0x01c00081
--//0x02 0x0006.00b.0000080e 0x00c009bb.03c4.14 --U- 2 fsc 0x07d7.177d9c1f
--//注意:kxidusn,kxidslt,kxidsqn的事务槽记录的一致。
BBED> p dba 7,128 ktsfs_txn[1]
struct ktsfs_txn[1], 20 bytes @4196
ub2 ktsfsflg @4196 0x0001 (KTSUSED)
struct ktsfsxid, 8 bytes @4200
ub2 kxidusn @4200 0x000a
ub2 kxidslt @4202 0x0010
ub4 kxidsqn @4204 0x00005d8d
ub4 ktsfslhd @4208 0x01c00082
ub4 ktsfsltl @4212 0x01c00082
--//0x01c00082 = set dba 7,130 = alter system dump datafile 7 block 130 = 29360258
--//0x03 0x000a.010.00005d8d 0x00c00b70.11db.0c --U- 2 fsc 0x07d7.177da008
--//注意:kxidusn,kxidslt,kxidsqn的事务槽记录的一致。
--//跟踪bbed的执行看看:
$ ps -ef | grep bbe[d]
oracle 55921 20345 0 Mar15 pts/5 00:00:00 /usr/local/bin/rlwrap -s 9999 -c -r -i -f /usr/local/share/rlwrap/bbed /u01/app/oracle/product/11.2.0.4/dbhome_1/bin/bbed parfile=bbed.par cmdfile=cmd.par
oracle 55922 55921 0 Mar15 pts/10 00:00:00 bbed app/oracle/product/11.2.0.4/dbhome_1/bin/bbed parfile=bbed.par cmdfile=cmd.par
$ ls -l /proc/55922/fd
total 0
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 0 -> /dev/pts/10
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 1 -> /dev/pts/10
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 10 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/rdbms/mesg/bbedus.msb
lrwx------ 1 oracle oinstall 64 2021-03-16 08:42:11 2 -> /dev/pts/10
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 3 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/rdbms/mesg/bbedus.msb
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 4 -> /u01/app/oracle/product/11.2.0.4/dbhome_1/oracore/mesg/lrmus.msb
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 5 -> /home/oracle/bbed/filelist.txt
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 6 -> /home/oracle/bbed/bifile.bbd
l-wx------ 1 oracle oinstall 64 2021-03-16 10:17:51 7 -> /home/oracle/bbed/log.bbd
lr-x------ 1 oracle oinstall 64 2021-03-16 10:17:51 8 -> /home/oracle/bbed/cmd.par
lrwx------ 1 oracle oinstall 64 2021-03-16 10:17:51 9 -> /mnt/ramdisk/book/mssm01.dbf
--//文件句柄9对应/mnt/ramdisk/book/mssm01.dbf。
$ strace -f -p 55922 -e read,lseek -o /tmp/bbed.txt
Process 55922 attached - interrupt to quit
^CProcess 55922 detached
$ egrep "lseek.9" /tmp/bbed.txt
55922 lseek(9, 1064960, SEEK_SET) = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, SEEK_SET) = 1065984
55922 lseek(9, 1067008, SEEK_SET) = 1067008
55922 lseek(9, 1068032, SEEK_SET) = 1068032
55922 lseek(9, 1069056, SEEK_SET) = 1069056
55922 lseek(9, 1070080, SEEK_SET) = 1070080
55922 lseek(9, 1071104, SEEK_SET) = 1071104
55922 lseek(9, 1072128, SEEK_SET) = 1072128
55922 lseek(9, 1064960, SEEK_SET) = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, SEEK_SET) = 1065984
55922 lseek(9, 1067008, SEEK_SET) = 1067008
55922 lseek(9, 1068032, SEEK_SET) = 1068032
55922 lseek(9, 1069056, SEEK_SET) = 1069056
55922 lseek(9, 1070080, SEEK_SET) = 1070080
55922 lseek(9, 1071104, SEEK_SET) = 1071104
55922 lseek(9, 1072128, SEEK_SET) = 1072128
55922 lseek(9, 1064960, SEEK_SET) = 1064960
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
55922 lseek(9, 1065984, SEEK_SET) = 1065984
55922 lseek(9, 1067008, SEEK_SET) = 1067008
55922 lseek(9, 1068032, SEEK_SET) = 1068032
55922 lseek(9, 1069056, SEEK_SET) = 1069056
55922 lseek(9, 1070080, SEEK_SET) = 1070080
55922 lseek(9, 1071104, SEEK_SET) = 1071104
55922 lseek(9, 1072128, SEEK_SET) = 1072128
--//130*8192 = 1064960,扫描dba=7,130块3次,并没有扫描dba=7,128,oracle是如何检查的呢?
--//检查N久发现仅仅可能ktbbh.ktbbhflg,ktbbh.ktbbhfsl
BBED> p dba 7,130 ktbbh
struct ktbbh, 96 bytes @20
ub1 ktbbhtyp @20 0x01 (KDDBTDATA)
union ktbbhsid, 4 bytes @24
ub4 ktbbhsg1 @24 0x00016328
ub4 ktbbhod1 @24 0x00016328
struct ktbbhcsc, 8 bytes @28
ub4 kscnbas @28 0x177d9bae
ub2 kscnwrp @32 0x0003
sb2 ktbbhict @36 3
ub1 ktbbhflg @38 0x03 (KTBFONFL) -->0 = ON THE FREELIST
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ub1 ktbbhfsl @39 0x03 --ITL TX FREELIST SLOT
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
ub4 ktbbhfnx @40 0x00000000
BBED> assign dba 7,130 ktbbh.ktbbhfsl=0x0
ub1 ktbbhfsl @39 0x00
BBED> assign dba 7,130 ktbbh.ktbbhflg=0x02
ub1 ktbbhflg @38 0x02 (NONE)
BBED> sum apply
Check value for File 7, Block 130:
current = 0xf9e5, required = 0xf9e5
BBED> verify
DBVERIFY - Verification starting
FILE = /mnt/ramdisk/book/mssm01.dbf
BLOCK = 130
--//当然这样修改没必要,仅仅知道还有1个事务在这里.还可以做一个验证:
BBED> assign dba 7,130 ktbbh.ktbbhitl[1].ktbitflg=0x2002
ub2 ktbitflg @84 0x2002 (KTBFUPB)
BBED> assign dba 7,130 ktbbh.ktbbhflg=0x03
ub1 ktbbhflg @38 0x03 (KTBFONFL)
BBED> assign dba 7,130 ktbbh.ktbbhfsl=0x02
ub1 ktbbhfsl @39 0x02
--//指向对应的ITL槽(从1开始计数)。
BBED> sum apply
Check value for File 7, Block 130:
current = 0xfbe7, required = 0xfbe7
BBED> verify
DBVERIFY - Verification starting
FILE = /mnt/ramdisk/book/mssm01.dbf
BLOCK = 130
7. 总结:
--//MSSM表空间块ITL的LCK 出现dml不一致的情况,是触发检查pctused引起的,修改了ktbbh.ktbbhfsl,ktbbh.ktbbhflg标识。
--//这与mssm表空间的有pctused属性有关,实际上昨天出现ORA-04000时,脑子第一个反应并不是这个问题如何解决,而是以前
--//遇到的这个问题。换一句话将如果当时能仔细思考mssm与assm有什么不同,或许这个问题早知道答案了。
--//itpub 上太缺乏相关人讨论这样的问题,我检索一下我在itpub上发过这个问题,链接:
--//http://www.itpub.net/thread-2108112-1-1.html
--//我们团队内部更加不可能........
--//又浪费一个上午的时间....从上午8:30-11:30坐的屁股都疼,真心想站起来走一走。