原文参考:
http://blog.itpub.net/8494287/viewspace-1411709/ 作者:dbhelper
####incremental statistics collection是如何实现的
---本测试中选择名为P20140329的partition进行测试
select table_name,partition_name,num_rows,blocks,last_analyzed from dba_tab_partitions where table_name='IMS_RES_MONITOR_2' and partition_name='P20140329';
TABLE_NAME PARTITION_NAME NUM_ROWS BLOCKS LAST_ANALYZED
------------------------------ ------------------------------ ----------------------- ----------------------- -----------------
IMS_RES_MONITOR_2 P20140329 17 110 20141217 11:00:55
---关于Synopis,可以理解为oracle为每个partition中的每一个列值生成的一张hash表
synopsis信息存放在SYSAUX里的两张表:
sys.wri$_optstat_synopsis_head$和sys.wri$_optstat_synopsis$
表结构如下:
desc sys.wri$_optstat_synopsis_head$
Name Null? Type
----------------------------------------------------------------------------------------------------------------- -------- ----------------------------------------------------------------------------
BO# NOT NULL NUMBER
GROUP# NOT NULL NUMBER
INTCOL# NOT NULL NUMBER
SYNOPSIS# NOT NULL NUMBER
SPLIT NUMBER
ANALYZETIME DATE
SPARE1 NUMBER
SPARE2 CLOB
desc sys.wri$_optstat_synopsis$
Name Null? Type
----------------------------------------------------------------------------------------------------------------- -------- ----------------------------------------------------------------------------
BO# NOT NULL NUMBER
GROUP# NOT NULL NUMBER
INTCOL# NOT NULL NUMBER
HASHVALUE NOT NULL NUMBER
BO#代表分区表的object_id,本例中object_id=3064698
select object_name,object_id from dba_objects where object_name='IMS_RES_MONITOR_2' and object_type not like '%PARTITION%'
OBJECT_NAME OBJECT_ID
-------------------------------------------------------------------------------------------------------------------------------- ----------
IMS_RES_MONITOR_2 3064698
group#至除以2得到subobject_name的object_id,也就是partition的id,本例中名为P20140329的分区,其object_id=4646416,那么group#=9292832就代表了这个分区
select object_name,subobject_name,object_type,object_id from dba_objects where object_name='IMS_RES_MONITOR_2' and subobject_name='P20140329';
OBJECT_NAME SUBOBJECT_NAME OBJECT_TYPE OBJECT_ID
------------------------------ ------------------------------ ------------------- ----------
IMS_RES_MONITOR_2 P20140329 TABLE PARTITION 4646416
INTCOL#:列的序号,本例中intcol#=1表示resource_id这个字段
select table_name,column_name,column_id from dba_tab_columns where table_name='IMS_RES_MONITOR_2' and column_id=1;
TABLE_NAME COLUMN_NAME COLUMN_ID
------------------------------ ------------------------------ ----------
IMS_RES_MONITOR_2 RESOURCE_ID 1
hashvalue:针对每一个列值会生成一个hash value,这个hash value才是实现incremental statistics collect的关键,oracle会根据这个hash值是否变化来决定是否要对这个partition重新收集统计
----我们看一下IMS_RES_MONITOR_2表里名为P20140329的分区resource_id列当前的hash值,共有17个
select a.bo#,a.group#,a.intcol#,a.analyzetime,b.hashValue from sys.wri$_optstat_synopsis_head$ a,sys.wri$_optstat_synopsis$ b where a.bo#=b.bo# and a.bo#=3064698 and a.group#=b.group# and a.intcol#=b.intcol# and a.intcol#=1 and a.group#=9292832;
BO# GROUP# INTCOL# ANALYZETIME HASHVALUE
---------- ---------- ---------- ----------------- ----------
3064698 9292832 1 20141217 11:00:55 4446974364460335394
3064698 9292832 1 20141217 11:00:55 9361946326222687396
3064698 9292832 1 20141217 11:00:55 3217208375921250414
3064698 9292832 1 20141217 11:00:55 13942627879913658026
3064698 9292832 1 20141217 11:00:55 261920373660441827
3064698 9292832 1 20141217 11:00:55 5170298820085325499
3064698 9292832 1 20141217 11:00:55 11206406075889999811
3064698 9292832 1 20141217 11:00:55 2561514943054471972
3064698 9292832 1 20141217 11:00:55 14068223268773651280
3064698 9292832 1 20141217 11:00:55 8292967130511617500
3064698 9292832 1 20141217 11:00:55 13810791139834474882
3064698 9292832 1 20141217 11:00:55 10918063814993801858
3064698 9292832 1 20141217 11:00:55 3598794041972912951
3064698 9292832 1 20141217 11:00:55 17019049280771226092
3064698 9292832 1 20141217 11:00:55 4338555575609523184
3064698 9292832 1 20141217 11:00:55 3974316234907155776
3064698 9292832 1 20141217 11:00:55 4934883602423086651
17 rows selected.
----p20140329分区的值也有17行
select resource_id from jd.ims_res_monitor_2 partition (p20140329);
RESOURCE_ID
-----------------
40100014633293
1076127377
1037223472
40100010153870
40100012648869
1103182014
40100014934637
40100009770277
1098949940
40100011540132
40100002960909
40100013734049
40100013416379
40100010149652
1046540285
40100015074922
40100010900764
17 rows selected.
---我们对p20140329分区的数据做一下修改,看一下是否这个分区会被重新分析
select last_analyzed from dba_tab_partitions where table_name='IMS_RES_MONITOR_2' and partition_name='P20140329';
LAST_ANALYZED
-----------------
20141217 11:00:55
update jd.ims_res_monitor_2 partition (p20140329) set resource_id=1 where resource_id=40100014633293;
1 row updated.
commit;
Commit complete.
select resource_id from jd.ims_res_monitor_2 partition (p20140329)
RESOURCE_ID
---------------------
1
1076127377
1037223472
40100010153870
40100012648869
1103182014
40100014934637
40100009770277
1098949940
40100011540132
40100002960909
40100013734049
40100013416379
40100010149652
1046540285
40100015074922
40100010900764
exec dbms_stats.gather_table_stats(ownname=>'JD',tabname=>'IMS_RES_MONITOR_2');
select last_analyzed from dba_tab_partitions where table_name='IMS_RES_MONITOR_2' and partition_name='P20140329';
LAST_ANALYZED
-----------------
20141218 21:04:17
---再次检查column列的hash值发现和上一次输出相比唯一的区别在于用1049436110058863352替换了8292967130511617500,其它值均不变
select a.bo#,a.group#,a.intcol#,a.analyzetime,b.hashValue from sys.wri$_optstat_synopsis_head$ a,sys.wri$_optstat_synopsis$ b where a.bo#=b.bo# and a.bo#=3064698 and a.group#=b.group# and a.intcol#=b.intcol# and a.intcol#=1 and a.group#=9292832;
BO# GROUP# INTCOL# ANALYZETIME HASHVALUE
---------- ---------- ---------- ----------------- ----------
3064698 9292832 1 20141218 21:04:15 4446974364460335394
3064698 9292832 1 20141218 21:04:15 9361946326222687396
3064698 9292832 1 20141218 21:04:15 3217208375921250414
3064698 9292832 1 20141218 21:04:15 13942627879913658026
3064698 9292832 1 20141218 21:04:15 261920373660441827
3064698 9292832 1 20141218 21:04:15 5170298820085325499
3064698 9292832 1 20141218 21:04:15 11206406075889999811
3064698 9292832 1 20141218 21:04:15 2561514943054471972
3064698 9292832 1 20141218 21:04:15 14068223268773651280
3064698 9292832 1 20141218 21:04:15 13810791139834474882
3064698 9292832 1 20141218 21:04:15 10918063814993801858
3064698 9292832 1 20141218 21:04:15 3598794041972912951
3064698 9292832 1 20141218 21:04:15 17019049280771226092
3064698 9292832 1 20141218 21:04:15 1049436110058863352
3064698 9292832 1 20141218 21:04:15 4338555575609523184
3064698 9292832 1 20141218 21:04:15 3974316234907155776
3064698 9292832 1 20141218 21:04:15 4934883602423086651
###为了验证oracle是根据hash值判断是否需要重新收集统计,下面人工将修改的那一行值恢复成原值,然后人工update表a,sys.wri$_optstat_synopsis$里的hash值
---修改前先记录一下P20140329分区前一次的统计信息
select a.bo#,a.group#,a.intcol#,a.analyzetime,b.hashValue from sys.wri$_optstat_synopsis_head$ a,sys.wri$_optstat_synopsis$ b where a.bo#=b.bo# and a.bo#=3064698 and a.group#=b.group# and a.intcol#=b.intcol# and a.intcol#=1 and a.group#=9292832
BO# GROUP# INTCOL# ANALYZETIME HASHVALUE
----------------------- ----------------------- ----------------------- ----------------- -----------------------
3064698 9292832 1 20141219 08:49:46 4446974364460335394
3064698 9292832 1 20141219 08:49:46 9361946326222687396
3064698 9292832 1 20141219 08:49:46 3217208375921250414
3064698 9292832 1 20141219 08:49:46 13942627879913658026
3064698 9292832 1 20141219 08:49:46 261920373660441827
3064698 9292832 1 20141219 08:49:46 5170298820085325499
3064698 9292832 1 20141219 08:49:46 11206406075889999811
3064698 9292832 1 20141219 08:49:46 2561514943054471972
3064698 9292832 1 20141219 08:49:46 14068223268773651280
3064698 9292832 1 20141219 08:49:46 13810791139834474882
3064698 9292832 1 20141219 08:49:46 10918063814993801858
3064698 9292832 1 20141219 08:49:46 3598794041972912951
3064698 9292832 1 20141219 08:49:46 17019049280771226092
3064698 9292832 1 20141219 08:49:46 1049436110058863352
3064698 9292832 1 20141219 08:49:46 4338555575609523184
3064698 9292832 1 20141219 08:49:46 3974316234907155776
3064698 9292832 1 20141219 08:49:46 4934883602423086651
---修改分区表resource_id字段改回原值
update jd.ims_res_monitor_2 partition (p20140329) set resource_id=40100014633293 where resource_id=1;
commit;
select resource_id from jd.ims_res_monitor_2 partition (p20140329) ;
RESOURCE_ID
-----------------------
40100014633293
1076127377
1037223472
40100010153870
40100012648869
1103182014
40100014934637
40100009770277
1098949940
40100011540132
40100002960909
40100013734049
40100013416379
40100010149652
1046540285
40100015074922
40100010900764
---更新sys.wri$_optstat_synopsis$表里的hash值
update sys.wri$_optstat_synopsis$ set hashvalue=8292967130511617500 where bo#=3064698 and intcol#=1 and group#=9292832 and hashvalue=1049436110058863352;
select a.bo#,a.group#,a.intcol#,a.analyzetime,b.hashValue from sys.wri$_optstat_synopsis_head$ a,sys.wri$_optstat_synopsis$ b where a.bo#=b.bo# and a.bo#=3064698 and a.group#=b.group# and a.intcol#=b.intcol# and a.intcol#=1 and a.group#=9292832
BO# GROUP# INTCOL# ANALYZETIME HASHVALUE
----------------------- ----------------------- ----------------------- ----------------- -----------------------
3064698 9292832 1 20141219 08:49:46 4446974364460335394
3064698 9292832 1 20141219 08:49:46 9361946326222687396
3064698 9292832 1 20141219 08:49:46 3217208375921250414
3064698 9292832 1 20141219 08:49:46 13942627879913658026
3064698 9292832 1 20141219 08:49:46 261920373660441827
3064698 9292832 1 20141219 08:49:46 5170298820085325499
3064698 9292832 1 20141219 08:49:46 11206406075889999811
3064698 9292832 1 20141219 08:49:46 2561514943054471972
3064698 9292832 1 20141219 08:49:46 14068223268773651280
3064698 9292832 1 20141219 08:49:46 13810791139834474882
3064698 9292832 1 20141219 08:49:46 10918063814993801858
3064698 9292832 1 20141219 08:49:46 3598794041972912951
3064698 9292832 1 20141219 08:49:46 17019049280771226092
3064698 9292832 1 20141219 08:49:46 8292967130511617500
3064698 9292832 1 20141219 08:49:46 4338555575609523184
3064698 9292832 1 20141219 08:49:46 3974316234907155776
3064698 9292832 1 20141219 08:49:46 4934883602423086651
17 rows selected.
---又一次刷新统计信息,观察P20140329分区的统计是否会被刷新
exec dbms_stats.gather_table_stats(ownname=>'JD',tabname=>'IMS_RES_MONITOR_2');
---虽然我们提前更新了hash值,但oracle还是重新收集了一遍统计,看来内部的控制逻辑比我们想象的要复杂
select a.bo#,a.group#,a.intcol#,a.analyzetime,b.hashValue from sys.wri$_optstat_synopsis_head$ a,sys.wri$_optstat_synopsis$ b where a.bo#=b.bo# and a.bo#=3064698 and a.group#=b.group# and a.intcol#=b.intcol# and a.intcol#=1 and a.group#=9292832;
BO# GROUP# INTCOL# ANALYZETIME HASHVALUE
----------------------- ----------------------- ----------------------- ----------------- -----------------------
3064698 9292832 1 20141219 09:13:06 4446974364460335394
3064698 9292832 1 20141219 09:13:06 9361946326222687396
3064698 9292832 1 20141219 09:13:06 3217208375921250414
3064698 9292832 1 20141219 09:13:06 13942627879913658026
3064698 9292832 1 20141219 09:13:06 261920373660441827
3064698 9292832 1 20141219 09:13:06 5170298820085325499
3064698 9292832 1 20141219 09:13:06 11206406075889999811
3064698 9292832 1 20141219 09:13:06 2561514943054471972
3064698 9292832 1 20141219 09:13:06 14068223268773651280
3064698 9292832 1 20141219 09:13:06 8292967130511617500
3064698 9292832 1 20141219 09:13:06 13810791139834474882
3064698 9292832 1 20141219 09:13:06 10918063814993801858
3064698 9292832 1 20141219 09:13:06 3598794041972912951
3064698 9292832 1 20141219 09:13:06 17019049280771226092
3064698 9292832 1 20141219 09:13:06 4338555575609523184
3064698 9292832 1 20141219 09:13:06 3974316234907155776
3064698 9292832 1 20141219 09:13:06 4934883602423086651
由于要维护这两套synopsis表,所以不可避免的会额外占用一定的存储空间,空间的大小和分区表里分区的数量,每个分区的列数多少均有关系,就拿测试所用的表ims_res_monitor_2来举例,一共有2333行,13个列,实际使用空间为160977bytes,WRI$_OPTSTAT_SYNOPSIS_HEAD$,WRI$_OPTSTAT_SYNOPSIS$这两张表合计使用的空间为595185bytes,所以这笔空间开销还是很大的,空间换时间的又一典型。