test
2020-11-05 16:56 AllegroCantabile 阅读(406) 评论(0) 编辑 收藏 举报
5、节点之间数据库的patchlevel要一致
crsctl query crs releasepatch
crsctl query crs softwarepatch -all
crsctl query crs softwarepatch -l
crsctl query crs activeversion -f
6、确保CDB和PDB的组件均为VALID
sqlplus / as sysdba
在所有pdb open的情况下可以使用以下命令检查:
col comp_id for a15
col version for a15
col comp_name for a40
select con_id, comp_id, comp_name, version, status from containers(dba_registry) order by 1;
单独检查某个CDB或PDB:
select comp_id, comp_name, version, status from dba_registry;
7、确保CDB和PDB的sqlpatch版本和软件版本一致
Set line 333 pages 9999
Col patch_id for 999999999
Col PATCH_UID for 99999999
Col PATCH_TYPE for a10
Col ACTION for a10
Col STATUS for a10
Col ACTION_TIME for a20
Col DESCRIPTION for a55
Col SOURCE_VERSION for a15
Col TARGET_VERSION for a15
select PATCH_ID,PATCH_UID,PATCH_TYPE,ACTION,STATUS,ACTION_TIME,DESCRIPTION,
SOURCE_VERSION,TARGET_VERSION
from dba_registry_sqlpatch
order by ACTION_TIME;
8、检查节点间的互信
ssh 10.220.0.20 date;ssh 10.220.0.21 date
9、检查PREPATCH和POSTPATCH的状态
检查ROOTCRS_PREPATCH(ROOTCRS_PREPATCH的状态要为START或者FIAL)
su - grid
$ /u01/app/19.0.0.0/grid/bin/cluutil -ckpt -oraclebase /u01/app/grid -chkckpt -name ROOTCRS_PREPATCH -status
START
如果为SUCCESS需要手动改成anaSTART:
$ /u01/app/19.0.0.0/grid/bin/cluutil -ckpt -oraclebase /u01/app/grid -writeckpt -name ROOTCRS_PREPATCH -state START
检查ROOTCRS_POSTPATCH(ROOTCRS_POSTPATCH的状态要为SUCCESS)
su - grid
$ /u01/app/19.0.0.0/grid/bin/cluutil -ckpt -oraclebase /u01/app/grid -chkckpt -name ROOTCRS_POSTPATCH -status
SUCCESS
如果为START/FAIL需要手动改成SUCCESS:
su - grid
$ /u01/app/19.0.0.0/grid/bin/cluutil -ckpt -oraclebase /u01/app/grid -writeckpt -name ROOTCRS_POSTPATCH -state SUCCESS
10、对用户下的ORACLE_HOME进行检查
19C版本数据库
# su - grid
$ORACLE_HOME/OPatch/opatch lsinventory -detail -oh /u01/app/19.0.0.0/grid
# su - oracle
$ORACLE_HOME/OPatch/opatch lsinventory -detail -oh /u01/app/oracle/product/19.0.0.0/dbhome_1
12C版本数据库
# su - oracle12c
$ORACLE_HOME/OPatch/opatch lsinventory -detail -oh /u01/app/oracle/product/12.1.0.2/dbhome_1
11、对补丁冲突检查
19C版本数据库
su - grid
$ORACLE_HOME/OPatch/opatch prereq CheckConflictAgainstOHWithDetail -phBaseDir /software/psu_19c/32545008/32545013
$ORACLE_HOME/OPatch/opatch prereq CheckConflictAgainstOHWithDetail -phBaseDir /software/psu_19c/32545008/32579761
$ORACLE_HOME/OPatch/opatch prereq CheckConflictAgainstOHWithDetail -phBaseDir /software/psu_19c/32545008/32576499
$ORACLE_HOME/OPatch/opatch prereq CheckConflictAgainstOHWithDetail -phBaseDir /software/psu_19c/32545008/32584670
su - oracle
$ORACLE_HOME/OPatch/opatch prereq CheckConflictAgainstOHWithDetail -phBaseDir /software/psu_19c/32545008/32545013
$ORACLE_HOME/OPatch/opatch prereq CheckConflictAgainstOHWithDetail -phBaseDir /software/psu_19c/32545008/32579761
12C版本数据库
su - oracle12c
cd /software/psu_12c/32507738
$ORACLE_HOME/OPatch/opatch prereq CheckConflictAgainstOHWithDetail -ph ./
12、补丁空间需求检查
19C版本数据库
GI补丁空间需求检查
# su - grid
cat > /tmp/patch_list_gihome.txt<<EOF
/software/psu_19c/32545008/32545013
/software/psu_19c/32545008/32579761
/software/psu_19c/32545008/32576499
/software/psu_19c/32545008/32584670
EOF
$ cat /tmp/patch_list_gihome.txt
/software/psu_19c/32545008/32545013
/software/psu_19c/32545008/32579761
/software/psu_19c/32545008/32576499
/software/psu_19c/32545008/32584670
$ORACLE_HOME/OPatch/opatch prereq CheckSystemSpace -phBaseFile /tmp/patch_list_gihome.txt
DB补丁空间需求检查
# su - oracle
cat > /tmp/patch_list_dbhome.txt<<EOF
/software/psu_19c/32545008/32545013
/software/psu_19c/32545008/32579761
EOF
$ cat /tmp/patch_list_dbhome.txt
/software/psu_19c/32545008/32545013
/software/psu_19c/32545008/32579761
$ORACLE_HOME/OPatch/opatch prereq CheckSystemSpace -phBaseFile /tmp/patch_list_dbhome.txt
13、检查oui-patch.xml文件
oui-patch.xml文件不存在可能导致升级过程因为自动创建的该文件权限不对将导致升级失败。
ls -l /u01/app/oraInventory/ContentsXML/oui-patch.xml
如果不存编辑一个:
su - grid
cat > /u01/app/oraInventory/ContentsXML/oui-patch.xml
<?xml version="1.0" encoding="UTF-8"?>
<!-- Copyright (c) 2019 Oracle Corporation. All Rights Reserved.
Do not modify the contents of this file by hand.
-->
<ONEOFF_LIST/>
chown grid:oinstall /u01/app/oraInventory/ContentsXML/oui-patch.xml
chmod 660 /u01/app/oraInventory/ContentsXML/oui-patch.xml
14、检查oraInventory/locks目录
在没有安装和升级程序正在运行的时候,oraInventory目录下不存在locks目录及相关lock文件,在某些条件下有过补丁升级失败修复的情况,可以导致locks目录没有被自动删除。需要在升级前删除locks目录。
ls -l /u01/app/oraInventory/locks
cd /u01/app/oraInventory/
rm -rf locks
#"TX,TM,DX" 锁应急处理
以下语句列出是谁造成了阻塞:
column event format a30
column sess format a20
set linesize 200 pagesize 1000
break on id1 skip 1
select decode(request,0,'Holder:','Waiter:')||s.inst_id||':'||s.sid||','||s.serial# sess, s.BLOCKING_SESSION,s.status,
id1,id2,lmode,request,l.type,ctime,s.username,s.sql_id,s.PREV_SQL_ID,s.event,s.SQL_EXEC_START
--,s.service_name
from gv$lock l,gv$session s
where (id1,id2,l.type) in
(select id1,id2,type from gv$lock where request>0)
and l.sid=s.sid
and l.inst_id=s.inst_id
order by id1,ctime desc,request
/
#oracle的 procedure、package被锁,处理方法
select oc.type obj_type,oc.name obj_name,spid from v$db_object_cache oc, v$object_dependency od, dba_kgllock w, v$session s, v$process p
where od.to_owner = oc.owner and od.to_name = oc.name and od.to_address = w.kgllkhdl and w.kgllkuse = s.saddr and p.addr = s.paddr --and oc.name = 'upper(123)'
order by s.logon_time asc
#查询表是否被锁
set pagesize 999 lines 999;
col object_name for a20;
col machine for a20;
col program for a30;
col killid for a10;
col os_pid for a20;
col sql_address for a10;
col username for a20;
select object_name,machine,s.program,s.sid,
s.sid||','||s.serial# as killid,
p.spid as os_pid,
s.sql_address,
decode(l.locked_mode, 0, 'None',
1, 'Null',
2, 'Row-S',
3, 'Row-X',
4, 'Share',
5, 'S/Row-X',
6, 'Exclusive', 'Unknown') locked_mode,
s.username,
s.process,
s.sql_id
from v$locked_object l,
dba_objects o,v$session s,v$process p
where l.object_id=o.object_id and l.session_id=s.sid
and s.paddr=p.addr
and object_name=upper('&tablename') and o.owner=upper('&owner') order by 1;
#收缩数据文件
1) 条件:如果没有分配的空间不足100M,则不考虑收缩。
2) 收缩目标:当前数据文件大小 - (没分配空间- 100M)×0.8 即收缩后表空间使用不超过80%
set line 300 pagesize 1000
col cmd for a100
select /*+ ordered use_hash(a,c) */ 'alter database datafile '''||a.file_name||''' resize ' ||round(a.filesize - (a.filesize - c.hwmsize-100) *0.8)||'M;' cmd, a.filesize, c.hwmsize, a.filesize-round(a.filesize - (a.filesize - c.hwmsize-100) *0.8) shrink_size
from (select file_id,file_name,round(bytes/1024/1024) filesize from dba_data_files) a,
(select file_id,round(max(block_id)*8/1024) HWMsize from dba_extents group by file_id) c
where a.file_id = c.file_id and a.filesize - c.hwmsize > 100 order by shrink_size;
===================================================== 数据库异常处理 ======================================================================
一、当前情况
1、监控当前TOPAS
topas
2、监控当前等待事件
set pagesize 999
set linesize 300
col event format a66
col wait_class format a30
select event,count(*),wait_class from gv$session group by event,wait_class order by 2;
3、通过等待事件检查会话
col machine format a32
col event format a32
col program format a32
set linesize 555
select sid,machine,program,sql_id,event,p1,p2 from v$session where event='&event_list' order by sql_id;
4、监控当前的数据库会话信息
set pagesize 999
set lines 666
col username format a13
col prog format a10 trunc
col sql_text format a41 trunc
col sid format 999999
col child for 999999
col avg_etime for 999,999,.99
col EXECS for 99999999999
select sid,
serial#,
status,
username,
substr(program,1,19) prog,
address,
hash_value,
b.sql_id,
child_number child,
plan_hash_value,
executions execs,
(elapsed_time / decode(nvl(executions,0),0,1,executions)) / 1000000 avg_etime,
sql_text
from v$session a,v$sql b
where status='ACTIVE'
and username is not null
and a.sql_id=b.sql_id
and a.sql_child_number=b.child_number
and sql_text not like
'%select sid,serial#,username,substr(program,1,19) prog,%' --don't show this query
order by sql_id,sql_child_number
/
5、根据以上查询的检查异常SQL_ID,主要检查 PLAN_HASH_VALUE
set lines 155
col execs for 999,999,999
col avg_etime for 999,999.999
col avg_lio for 999,999,999.9
col begin_interval_time for a30
col node for 99999
break on plan_hash_value on startup_time skip 1
select ss.snap_id,
ss.instance_number node,
begin_interval_time,
sql_id,
plan_hash_value,
nvl(executions_delta,0) execs,
(elapsed_time_delta /
decode(nvl(executions_delta,0),0,1,executions_delta))/1000000 avg_etime,
(buffer_gets_delta /
decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_lio,
(disk_reads_delta/
decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_pio
from DBA_HIST_SQLSTAT S,DBA_HIST_SNAPSHOT SS
where sql_id='&sql_id'
and ss.snap_id=S.snap_id
and ss.instance_number=S.instance_number
and s.instance_number like nvl('&instance_number',s.instance_number)
and executions_delta>0
order by 1,2,3
/
6、通过上面的返回查看执行计划是否改变,检查执行计划
set linesize 300
select * from table(DBMS_XPLAN.DISPLAY_CURSOR(to_char('&sql_id'),NULL));
二、非当前情况
1、检查快照信息
select INSTANCE_NUMBER,SNAP_ID,BEGIN_INTERVAL_TIME from DBA_HIST_SNAPSHOT where BEGIN_INTERVAL_TIME>sysdate-1 order by SNAP_ID;
##语句在时间段内的消耗情况(snap_id)
select /*+parallel(t,4)*/
plan_hash_value,
buffer_gets_delta/executions_delta get_exec,
disk_reads_delta/executions_delta read_exec,
cpu_time_delta/executions_delta/1000 cpu_exec_ms ,
elapsed_time_delta/executions_delta/1000 elaps_exec_ms ,
parsing_schema_id,
ROWS_PROCESSED_delta/executions_delta rows_exec,
executions_delta
from dba_hist_sqlstat t
where sql_id = '1mgfnuxggrfj0'
and instance_number = 1
and SNAP_ID between 8978 and 8988
and EXECUTIONS_DELTA > 0;
回放故障期间数据库层等待事件的情况:
col event for a35
set lines 222
set pages 9999
col p1 for 999999999999999999999999999999
select instance_number,sample_id,sample_time,event,count(*) from dba_hist_active_sess_history
where sample_time >
to_timestamp('2018-08-02 11:00:00','yyyy-mm-dd hh24:mi:ss')
and sample_time <
to_timestamp('2018-08-02 11:55:00','yyyy-mm-dd hh24:mi:ss')
--and event ='change tracking file synchronous read'
group by instance_number,sample_id,sample_time,event
order by 1,3,5;
col event for a35
set lines 222
set pages 9999
col p1 for 999999999999999999999999999999
select sample_id,sample_time,event,count(*) from gv$active_session_history;
where INST_ID=2 and event is not null
and sample_id between 43715229 and 43715911
group by sample_id,sample_time,event
order by 1,4;
2、按照快照检查SQL执行信息
set lines 155
col execs for 999,999,999
col min_etime for 999,999.99
col max_etime for 999,999.99
col avg_etime for 999,999.999
col avg_lio for 999,999,999.9
col norm_stddev for 999,999.9999
col begin_interval_time for a30
col node for 99999
break on plan_hash_value on startup_time skip 1
select * from
(select sql_id,
sum(execs),
min(avg_etime) min_etime,
max(avg_etime) max_etime,
stddev_etime / min(avg_etime) norm_stddev
from (select sql_id,
plan_hash_value,
execs,
avg_etime,
stddev(avg_etime) over(partition by sql_id) stddev_etime
from (select sql_id,
plan_hash_value,
sum(nvl(executions_delta,0)) execs,
(sum(executions_delta)/
decode(sum(nvl(executions_delta,0)),
0,
1,
sum(executions_delta)) /1000000) avg_etime
--,sum((buffer_gets_delta/decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_lio
from DBA_HIST_SQLSTAT S,DBA_HIST_SNAPSHOT SS
where ss.snap_id=s.snap_id
and ss.instance_number=s.instance_number
and executions_delta>0
and elapsed_time_delta>0
and s.snap_id > nvl('&start_snap_id',0)
and s.snap_id < nvl('&last_snap_id',0)
group by sql_id,plan_hash_value))
group by sql_id,stddev_etime)
where max_etime>0.01
order by 4
/
3、根据时间检查SQL的信息 (可以修改default的值 当前为0.5天)
alter session set nls_date_format='yyyy-mm-dd hh24:mi:ss';
accept days_ago -
prompt 'Enter Days ago: ' -
default '1'
set lines 155
col execs for 999,999,999
col before_etime for 999,990.99
col after_etime for 999,990.99
col before_avg_etime for 999,990.99 head AVG_ETIME_BEFORE
col after_avg_etime for 999,990.99 head AVG_ETIME_AFTER
col min_etime for 999,990.99
col max_etime for 999,990.99
col avg_etime for 999,990.999
col avg_lio for 999,999,990.9
col norm_stddev for 999,990.9999
col begin_interval_time for a30
col node for 99999
break on plan_hash_value on startup_time skip 1
select * from (
select sql_id, execs, before_avg_etime, after_avg_etime, norm_stddev,
case when to_number(before_avg_etime) < to_number(after_avg_etime) then 'Slower' else 'Faster' end result
-- select *
from (
select sql_id, sum(execs) execs, sum(before_execs) before_execs, sum(after_execs) after_execs,
sum(before_avg_etime) before_avg_etime, sum(after_avg_etime) after_avg_etime,
min(avg_etime) min_etime, max(avg_etime) max_etime, stddev_etime/min(avg_etime) norm_stddev,
case when sum(before_avg_etime) > sum(after_avg_etime) then 'Slower' else 'Faster' end better_or_worse
from (
select sql_id,
period_flag,
execs,
avg_etime,
stddev_etime,
case when period_flag = 'Before' then execs else 0 end before_execs,
case when period_flag = 'Before' then avg_etime else 0 end before_avg_etime,
case when period_flag = 'After' then execs else 0 end after_execs,
case when period_flag = 'After' then avg_etime else 0 end after_avg_etime
from (
select sql_id, period_flag, execs, avg_etime,
stddev(avg_etime) over (partition by sql_id) stddev_etime
from (
select sql_id, period_flag, sum(execs) execs, sum(etime)/sum(decode(execs,0,1,execs)) avg_etime from (
select sql_id, 'Before' period_flag,
nvl(executions_delta,0) execs,
(elapsed_time_delta)/1000000 etime
-- sum((buffer_gets_delta/decode(nvl(buffer_gets_delta,0),0,1,executions_delta))) avg_lio
from DBA_HIST_SQLSTAT S, DBA_HIST_SNAPSHOT SS
where ss.snap_id = S.snap_id
and ss.instance_number = S.instance_number
and executions_delta > 0
and elapsed_time_delta > 0
and ss.begin_interval_time <= sysdate-&&days_ago
union
select sql_id, 'After' period_flag,
nvl(executions_delta,0) execs,
(elapsed_time_delta)/1000000 etime
-- (elapsed_time_delta)/decode(nvl(executions_delta,0),0,1,executions_delta)/1000000 avg_etime
-- sum((buffer_gets_delta/decode(nvl(buffer_gets_delta,0),0,1,executions_delta))) avg_lio
from DBA_HIST_SQLSTAT S, DBA_HIST_SNAPSHOT SS
where ss.snap_id = S.snap_id
and ss.instance_number = S.instance_number
and executions_delta > 0
and elapsed_time_delta > 0
and ss.begin_interval_time > sysdate-&&days_ago
-- and s.snap_id > 7113
)
group by sql_id, period_flag
)
)
)
group by sql_id, stddev_etime
)
)
where after_avg_etime>0.01
order by norm_stddev
/
4、根据以上查询的检查异常SQL_ID,主要检查PLAN_HASH_VALUE
set lines 155
col execs for 999,999,999
col avg_etime for 999,999.999
col avg_lio for 999,999,999.9
col begin_interval_time for a30
col node for 99999
break on plan_hash_value on startup_time skip 1
select ss.snap_id,
ss.instance_number node,
begin_interval_time,
sql_id,
plan_hash_value,
nvl(executions_delta,0) execs,
(elapsed_time_delta/
decode(nvl(executions_delta,0),0,1,executions_delta))/1000000 avg_etime,
(buffer_get_delta/
decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_lio,
(disk_reads_delta/
decode(nvl(buffer_gets_delta,0),0,1,executions_delta)) avg_pio
from DBA_HIST_SQLSTAT S,DBA_HIST_SNAPSHOT SS
where sql_id-'&sql_id'
and ss.snap_id=s.snap_id
and ss.instance_number=s.instance_number
and s.instance_number like '&instance_number'
and executions_delta > 0
order by 1,2,3
/
5、通过上面的返回查看执行计划是否改变,检查执行计划
set pagesize 999
set linesize 380
select * from table(dbms_xplan.display_cursor(to_char('&sql_id'),NULL);
6、根据快照ID查询SQL_ID的来源信息(查所有的视图太大)
set linesize 555
set pagesize 999
col event format a40
col program format a30
col wait_class format a20
select snap_id,instance_number,session_id,sql_id,event,wait_class,p1,p2,p3,blocking_session,blocking_inst_id,program
from dba_hist_active_sess_history
where snap_id > &start_snap_id
and snap_id < &last_snap_id
and sql_id = &sql_id
order by snap_id,program;
===================================================== 等待事件 ======================================================================
##利用触发器记录每个会话的等待事件
在数据库上部署了这个脚本,就能够及时回顾以发现每一个前台进程在数据库中所作的工作。
create table system.session_event_history
tablespace <name>
storage (freelist groups <value>)
initrans <value>
as
select b.sid,
b.serial#,
b.username,
b.osuser,
b.paddr,
b.process,
b.logon_time,
b.type,
a.event,
a.total_waits,
a.total_timeouts,
a.time_waited,
a.average_wait,
a.max_wait,
sysdate as logoff_timestamp
from v$session_event a, v$session b
where 1 = 2;
create table system.sesstat_history
tablespace < name >
storage (freelist groups <value>)
initrans <value>
as
select c.username,
c.osuser,
a.sid,
c.serial#,
c.paddr,
c.process,
c.logon_time,
a.statistic#,
b.name,
a.value,
sysdate as logoff_timestamp
from v$sesstat a, v$statname b, v$session c
where 1 = 2;
create or replace trigger sys.logoff_trig
before logoff on database
declare
logoff_sid pls_integer;
logoff_time date := sysdate;
begin
select sid into logoff_sid from v$mystat where rownum < 2;
insert into system.session_event_history
(sid,
serial#,
username,
osuser,
paddr,
process,
logon_time,
type,
event,
total_waits,
total_timeouts,
time_waited,
average_wait,
max_wait,
logoff_timestamp)
select a.sid,
b.serial#,
b.username,
b.osuser,
b.paddr,
b.process,
b.logon_time,
b.type,
a.event,
a.total_waits,
a.total_timeouts,
a.time_waited,
a.average_wait,
a.max_wait,
logoff_time
from v$session_event a, v$session b
where a.sid = b.sid
and b.username = login_user
and b.sid = logoff_sid;
insert into system.sesstat_history
(username,
osuser,
sid,
serial#,
paddr,
process,
logon_time,
statistic#,
name,
value,
logoff_timestamp)
select c.username,
c.osuser,
a.sid,
c.serial#,
c.paddr,
c.process,
c.logon_time,
a.statistic#,
b.name,
a.value,
logoff_time
from v$sesstat a, v$statname b, v$session c
where a.statistic# = b.statistic#
and a.sid = c.sid
and b.name in ('CPU used when call started', --调用开始时使用的CPU时间。
'CPU used by this session', --会话自用户调用起至结束期间使用CPU时间的总量。
'recursive cpu usage', --非用户调用(递归调用)使用的CPU时间总量。
'parse time cpu') --(硬和软)解析使用的CPU时间的总量。
and c.sid = logoff_sid
and c.username = login_user;
end;
/
##某段时间等待事件情况
select /*+ parallel(d,16) */sql_id,event,count(*) from dba_hist_active_sess_history d
where d.event in ('db file sequential read','gc buffer busy acquire','read by other session','latch: cache buffers chains') and d.dbid=1714394454
and d.sample_time between to_date('2017-03-01 07:00:00','yyyy-mm-dd hh24:mi:ss') and to_date('2017-03-01 09:00:00','yyyy-mm-dd hh24:mi:ss')
and d.instance_number=1 group by sql_id,event order by count(*),sql_id,event;
##等待事件对应的p1,p2,p3含义
col name for a25;
col p1 for a10;
col p2 for a10;
col p3 for a10;
SELECT NAME, PARAMETER1 P1, PARAMETER2 P2, PARAMETER3 P3
FROM V$EVENT_NAME
WHERE NAME = '&event_name';
--sql执行历史情况查询
--v$active_session_history是动态的,ASH保存在内存中,每1秒从v$session_wait中取一次,ASH通常用来分析当前的性能问题,而dba_hist_active_sess_history则是ASH以及AWR采集数据的一个总览表,通常可以通过该视图进行历史数据的访问
--BLOCKING_SESSION是指引起等待的session_id
select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,SESSION_STATE,USER_ID,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from dba_hist_active_sess_history where
SAMPLE_TIME>to_date('20130825 08:00','yyyymmdd hh24:mi:ss') and event ='enq: TX - row lock contention';
select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,USER_ID,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from dba_hist_active_sess_history where
SESSION_ID=1598;
select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,USER_ID,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from v$active_session_history where
SESSION_ID=1598;
select to_char(b.end_interval_time,'yyyy-mm-dd hh24:mi:ss') sample_time, a.*
from dba_hist_sqlstat a,dba_hist_snapshot b
where a.snap_id=b.snap_id and a.sql_id in ('dfafaf','dkafja');
select to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME,SESSION_ID,SESSION_STATE,USER_ID,event,SQL_ID,BLOCKING_SESSION,CURRENT_OBJ# from dba_hist_active_sess_history where
SAMPLE_TIME>to_date('20130825 08:00','yyyymmdd hh24:mi:ss') ;
##dba_hist_active_sess_history分析
create table tmp_ash_20180920 as select * from dba_hist_active_sess_history
where sample_time between to_date('20180919 02:00','yyyymmdd hh24:mi') and to_date('20180920 05:00','yyyymmdd hh24:mi');
create table tmp_sql_20180920 as select * from WRH$_SQLTEXT where sql_id in (select sql_id from tmp_ash_20180920);
exp "'/ as sysdba'" file=tmp_ash_20180920.dmp tables=tmp_ash_20180920,tmp_sql_20180920
imp "'/ as sysdba'" file=tmp_ash_20170517.dmp full=y
select instance_number,sample_id,sample_time,event,count(*) from tmp_ash_20170517 group by instance_number,sample_id,sample_time,event order by 1,3,5;
col SQL_ID for a20
col EVENT for a20
col SQL_OPNAME for a10
col SAMPLE_TIME for a30
col PROGRAM for a40
select sample_time, session_id, user_id, sql_id, sql_opname,event,blocking_session,program from tmp_ash_20170517 where session_id=4338 order by 1;
select session_id, sample_id, user_id, sql_id, is_sqlid_current,sql_opname,event,blocking_session,top_level_call_name,program from tmp_ash_20170517 where sample_id in (24810796) order by session_id;
##ash中等待事件的变化
SELECT to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss') SAMPLE_TIME, event, COUNT(*) FROM dba_hist_active_sess_history
WHERE sample_time between to_date('20170510 00:00','yyyymmdd hh24:mi') and to_date('20170510 00:30','yyyymmdd hh24:mi')
GROUP BY to_char(SAMPLE_TIME,'yyyy-mm-dd hh24:mi:ss'),event ORDER BY 1;
--ASH中每个采样点的top n event --http://feed.askmaclean.com/archives/dba_hist_active_sess_history.html
select t.dbid,
t.sample_id,
t.sample_time,
t.instance_number,
t.event,
t.session_state,
t.c session_count
from (select t.*,
rank() over(partition by dbid, instance_number, sample_time order by c desc) r
from (select /*+ parallel 8 */
t.*,
count(*) over(partition by dbid, instance_number, sample_time, event) c,
row_number() over(partition by dbid, instance_number, sample_time, event order by 1) r1
from dba_hist_active_sess_history t
where sample_time >
to_timestamp('&begin_sample_time',
'yyyy-mm-dd hh24:mi:ss')
and sample_time <
to_timestamp('&end_sample_time',
'yyyy-mm-dd hh24:mi:ss')
) t
where r1 = 1) t
where r < 3
order by dbid, instance_number, sample_time, r;
--观察每个采样点的等待链:
select
level lv,
connect_by_isleaf isleaf,
connect_by_iscycle iscycle,
t.dbid,
t.sample_id,
t.sample_time,
t.instance_number,
t.session_id,
t.sql_id,
t.session_type,
t.event,
t.session_state,
t.blocking_inst_id,
t.blocking_session,
t.blocking_session_status
from m_ash20180322 t
where sample_time >
to_timestamp('2018-03-22 09:59:00',
'yyyy-mm-dd hh24:mi:ss')
and sample_time <
to_timestamp('2018-03-22 10:00:00',
'yyyy-mm-dd hh24:mi:ss')
start with blocking_session is not null
connect by nocycle
prior dbid = dbid
and prior sample_time = sample_time
/*and ((prior sample_time) - sample_time between interval '-1'
second and interval '1' second)*/
and prior blocking_inst_id = instance_number
and prior blocking_session = session_id
and prior blocking_session_serial# = session_serial#
order siblings by dbid, sample_time;
进一步筛选,将isleaf=1的叶(top holder)找出来
--基于上一步的原理来找出每个采样点的最终top holder:
select t.lv,
t.iscycle,
t.dbid,
t.sample_id,
t.sample_time,
t.instance_number,
t.session_id,
t.sql_id,
t.session_type,
t.event,
t.seq#,
t.session_state,
t.blocking_inst_id,
t.blocking_session,
t.blocking_session_status,
t.c blocking_session_count
from (select t.*,
row_number() over(partition by dbid, instance_number, sample_time order by c desc) r
from (select t.*,
count(*) over(partition by dbid, instance_number, sample_time, session_id) c,
row_number() over(partition by dbid, instance_number, sample_time, session_id order by 1) r1
from (select /*+ parallel 8 */
level lv,
connect_by_isleaf isleaf,
connect_by_iscycle iscycle,
t.*
from m_ash20180322 t
where sample_time >
to_timestamp('2018-03-22 09:59:00',
'yyyy-mm-dd hh24:mi:ss')
and sample_time <
to_timestamp('2018-03-22 10:00:00',
'yyyy-mm-dd hh24:mi:ss')
start with blocking_session is not null
connect by nocycle
prior dbid = dbid
and prior sample_time = sample_time
/*and ((prior sample_time) - sample_time between interval '-1'
second and interval '1' second)*/
and prior blocking_inst_id = instance_number
and prior blocking_session = session_id
and prior
blocking_session_serial# = session_serial#) t
where t.isleaf = 1) t
where r1 = 1) t
where r < 3
order by dbid, sample_time, r;
##找出历史坏SQL
使用如下脚本可以监控历史会话经历过哪些等待事件,并且按照等待次数降序排列
select session_id,event,count(*),sum(time_waited) from v$active_session_history where session_state='WAITING'
and time_waited>0 and sample_time>=(sysdate-&howlongago_min/(24*60))
group by session_id,event order by 3 desc;
然后通过如下语句查询出引起该等待事件的sql_id
select event, session_id,sql_id, p1,p2,p3 from v$active_session_history where sample_time>=(sysdate-&howlongago/(24*60)) and session_id=&sid;
再通过如下语句找到引起该等待事件的SQL
select sql_text from v$sqlarea where sql_id='&sql_id';
##找出sql执行消耗
在Oracle 11g中,当SQL满足以下条件之一就会被sql monitor捕获到,监控数据被记录在v$sql_monitor视图中。
■ 当SQL并行执行时,会立即被实时监控到
■ 当SQL单进程运行时,如果消耗超过5秒的CPU或I/O时间,它也会被监控到。(通过修改隐含参数_sqlmon_threshold可以控制这一行为,默认为5秒,如果设置为0将关闭SQL MONITORING功能)
■ 使用/*+ monitor */提示的sql语句:如select /*+ monitor */ * from dba_tables;
set trimspool on
set arraysize 512
set trim on
set pagesize 0
set linesize 1000
set long 1000000
set longchunksize 1000000
spool sqlmon.html
select /*+ noparallel */ dbms_sqltune.report_sql_monitor (
sql_id=>'&sql_id',
report_level=>'ALL',
type=>'ACTIVE')
from dual;
spool off
cat sqlmon.html
##direct path read
direct path read 的原因有以下:
1.大量的磁盘排序操作,无法在排序区中完成排序,需要利用temp表空间进行排序.
2.大量的Hash Join操作,利用temp表空间保存hash区。
3.大表的全表扫描,在Oracle11g中,全表扫描的算法有新的变化,根据表的大小、高速缓存的大小等信息,决定是否绕过SGA直接从磁盘读取数据。而10g则是全部通过高速缓存读取数据,称为table scan(large)。
11g认为大表全表时使用直接路径读,可能比10g中的数据文件散列读(db file scattered reads)速度更快,使用的latch也更少。
##一次ash分析案例
1、获得故障期间按时间分布的活动会话个数的趋势。
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss'), COUNT(1) FROM dba_hist_active_sess_history
WHERE instance_number=1
GROUP BY to_char(sample_time, 'yyyymmdd hh24:mi:ss')
ORDER BY 1;
2、活动会话高峰期的等待事件
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event, COUNT(*) FROM dba_hist_active_sess_history
WHERE to_char(sample_time, 'yyyymmdd hh24:mi:ss')='20161120 20:44'
AND instance_number=1
GROUP BY to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event
HAVING COUNT(*)>=10;
--和RAC global cache相关的等待
3、找到某一瞬间,正在等gc buffer busy acquire的上百个会话,分别在申请哪些BLOCK(p1/p2),被谁阻塞了(blocking_session)。
SELECT p1,p2,blocking_inst_id||'_'||blocking_session block_sid, COUNT(1)
FROM dba_hist_active_sess_history
WHERE to_char(sample_time, 'yyyymmdd hh24:mi:ss')='20161120 20:44:04'
AND instance_number=1
AND event='gc buffer busy acquire'
GROUP BY p1,p2, blocking_inst_id||'_'||blocking_session
ORDER BY COUNT(1);
P1 P2 BLOCK_SID COUNT(1)
--- ------- ----------- --------
47 1540116 1_3200 82
......
可以看到,在开始出现问题的时刻,即20:44:04
节点1有82个会话申请同一个BLOCK在等gc bufferbusy acquire,被节点1 SID为3200的会话阻塞了,即节点1 SID为3200的会话先于82个进程向节点2请求。
4、查看节点1 SID为3200的会话在等什么
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
FROM dba_hist_active_sess_history
WHERE instance_number||'_'||session_id='1_3200'
AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
ORDER BY 1;
SAMPLE_TIME SID EVENT STATE SEQ# P1 P2 BLOCK_SID
------------------- ------- ----------------------- --------- ------- ----- --------- -----------
20161120 20:44:04 1_3200 gc current blcok busy WAITING 7667 47 1540116 _
20161120 20:44:14 1_3200 gc current blcok busy WAITING 7667 47 1540116 _
20161120 20:44:24 1_3200 gc current blcok busy WAITING 7667 47 1540116 _
节点1 SID为3200的会话在请求收到文件号47 ,BLOCK号1540116的时候,无法快速获取,
等待事件是gc current blockbusy,并且这次申请从20:44:04到20:44:24,持续了20秒以上,但是崩溃的是,没有办法知道谁是我的阻塞者了
5、从sqlid出发
既然客户已经提到,有一笔插入报文表的联机交易2016-11-20 20:44:04 ~ 20:44:34,时间达到了30秒。那么我们就从这里开始好了
SELECT sql_exec_id, COUNT(1), to_char(MIN(sample_time), 'yyyymmdd hh24:mi:ss') min_sample_time, to_char(MAX(sample_time), 'yyyymmdd hh24:mi:ss') max_sample_time
FROM dba_hist_active_sess_history
WHERE sql_id='15vru8xqgdkjf'
GROUP BY sql_exec_id
ORDER BY COUNT(1) DESC;
SQL_EXEC_ID COUNT(1) MIN_SAMPLE_TIME MAX_SAMPLE_TIME
------------- --------- ------------------- -------------------
22616995 4 20161120 20:44:04 20161120 20:44:34
......
--可以看到:SQL_ID为'15vru8xqgdkjf'在ASH中被连续采样到了4次,说明执行时间大于30秒.与客户描述完全一致。
6、我们通过sql_exec_id即SQL的唯一执行标记,来看看这笔超长交易的情况
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, event, session_state, p1, p2,
blocking_inst_id||'_'||blocking_session block_sid FROM dba_hist_active_sess_history
WHERE sql_exec_id=22616995
AND sql_id='15vru8xqgdkjf'
ORDER BY sample_time;
SAMPLE_TIME SID EVENT STATE P1 P2 BLOCK_SID
------------------- ------- ------------------- --------- ----- --------- ------------
20161120 20:44:04 1_2724 gc current retry WAITING 43 1152178 _
20161120 20:44:14 1_2724 gc current retry WAITING 43 1152178 _
20161120 20:44:24 1_2724 gc current retry WAITING 43 1152178 _
20161120 20:44:34 1_2724 gc currentretry WAITING 39 1770308 1_2449
确实最长的一笔交易,在04秒到24秒期间,一直在等”gc current retry”。“gc currentretry”,从字面上,看是请求当前BLOCK,但是无法获取,在retry.
既然是向节点2请求一个BLOCK,无法获取,需要retry,那么我们有必要查下节点2是谁负责供应这个BLOCK 呢?没错!就是节点2的LMS进程!
接下来,我们于情于理,都应该去查下问题期间,LMS进程在做什么
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, PROGRAM, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
FROM dba_hist_active_sess_history
WHERE instance_number=2
AND upper(PROGRAM) LIKE '%LMS%'
AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
ORDER BY sid, sample_time;
SAMPLE_TIME SID PROGRAM EVENT STATE P1 P2 BLOCK_SID
------------------- ------- ----------------------- ------------------- --------- ----- --------- ------------
20161120 20:44:06 2_1035 oracle@corerac2 (LMS9) gcs log flush sysnc WAITING 30 0 2_2633
20161120 20:44:16 2_1035 oracle@corerac2 (LMS9) gcs log flush sysnc WAITING 30 0 2_2633
20161120 20:44:06 2_1082 oracle@corerac2 (LMSA) gcs log flush sysnc WAITING 30 0 2_2633
20161120 20:44:16 2_1082 oracle@corerac2 (LMSA) gcs log flush sysnc WAITING 30 0 2_2633
20161120 20:44:06 2_1129 oracle@corerac2 (LMSB) gcs log flush sysnc WAITING 30 0 2_2633
20161120 20:44:16 2_1129 oracle@corerac2 (LMSB) gcs log flush sysnc WAITING 30 0 2_2633
.....
可以看到,这些LMS进程在等gcs log flush sync,即LMS进程在把一个BLOCK传给其他节点前,需要把这个BLOCK在log buffer中的改变,刷到磁盘中的在线日志文件,
但是很可惜,刷了很长时间都没有完成。所以没有办法快速将BLOCK传输给节点1,因此节点1在不断retry.这里” gcs log flush sync”的阻塞者2_2633显然就是我们所熟悉的LGWR进程,即log buffer 的写进程。
看看节点2的LGWR进程是不是在忙其他事,无暇顾及LMS进程的刷日志请求呢?
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, PROGRAM, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
FROM dba_hist_active_sess_history
WHERE instance_number||'_'||session_id='2_2633'
AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
ORDER BY sample_time;
SAMPLE_TIME SID PROGRAM EVENT STATE SEQ# P1 P2 BLOCK_SID
------------------- ------- ----------------------- ----------------------- --------- ----- ----------- --------- ------------
20161120 20:44:06 2_2633 oracle@corerac2 (LGWR) enq: CF-contention WAITING 30 1128660997 0 _
20161120 20:44:16 2_2633 oracle@corerac2 (LGWR) enq: CF-contention WAITING 30 1128660997 0 1_1369
20161120 20:44:26 2_2633 oracle@corerac2 (LGWR) enq: CF-contention WAITING 30 1128660997 0 1_1369
20161120 20:44:36 2_2633 oracle@corerac2 (LGWR) LOG file parallel write WAITING 30 1 118 _
可以看到:节点2的LGWR进程,在等enq: CF-contention,即想要去写控制文件(例如切换日志时需要写控制文件),但是被其他人领先一步,这个人是节点1的1369会话。
期间,LGWR进程傻傻的继续等控制文件的队列锁,但等了几十秒都无法获取。
乘胜追击,发出下列查询,看看节点1的1369会话为什么长时间持有控制文件的队列锁。
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss') sample_time, instance_number||'_'||session_id sid, PROGRAM, event, session_state,state, seq#, p1,p2,blocking_inst_id||'_'||blocking_session block_sid
FROM dba_hist_active_sess_history
WHERE instance_number||'_'||session_id='1_1369'
AND to_char(sample_time, 'yyyymmdd hh24:mi:ss') BETWEEN '20161120 20:44:04' AND '20161120 20:44:24'
ORDER BY sample_time;
SAMPLE_TIME SID PROGRAM EVENT STATE SEQ# P1 P2 BLOCK_SID
------------------- ------- ----------------------- ---------------------------- --------- ----- ----------- --------- ------------
20161120 20:44:04 1_1369 oracle@corerac2 (CKPT) control file sequential read WAITING 16094 0 39 _
可以看到,sid=1369是节点1的ckpt检查点进程. ckpt正在进行控制文件的读写,因此持有控制文件的锁。
期间等待事件是控制文件的顺序度和并行写,并且没有阻塞,而是一直在变化。正常来说,如果IO比较快的话,那么持有控制文件的锁的时间是非常快的。
从oswatcher数据可以看到,在问题时段,很多磁盘busy 100%,平均响应时间200多毫秒,IO基本上主要是写.
检查ASH中的等待事件
SELECT to_char(sample_time, 'yyyymmdd hh24:mi:ss'), event, COUNT(1) FROM dba_hist_active_sess_history
WHERE instance_number=1
AND event LIKE '%write%'
GROUP BY to_char(sample_time, 'yyyymmdd hh24:mi:ss')
ORDER BY 1,3;
可以看到是问题时刻,在等db file parallel write的进程有24个,显然是DBWR进程在等该数据库总计24个DBWR进程,全部处于活动状态,说明有什么动作触发了大量刷脏块的操作。
检查alert日志,可以看到alter system archive log all(current)的命令,该命令必然会触发DBWR大量刷脏块,导致磁盘在短时间内极忙,导致HBA卡带宽也可能被占满。
答案就是我们大家所熟悉的RMAN备份脚本!
要说明的是,真相和你想的不一样!不是备份产生的IO影响了磁盘繁忙!
因为之前我们看到了,磁盘繁忙100%来自于写,而不是来自于读!RMAN主要是读!而且如果是RMAN的读IO影响磁盘繁忙,那么不可能只影响了30秒左右!
因为RMAN脚本开始执行时的alter system archive log all(current)的命令触发了DBWR大量刷脏块,导致磁盘很忙,而控制文件和数据文件在一起,导致ckpt进程在拿到控制文件队列锁后,
操作很慢,继而阻塞了LGWR进程,LGWR进程于是不响应LMS进程,最后导致了GC等待数十秒,也就是联机交易大量超时。全量检查点完成后,交易恢复正常!
故障原因总结:
- 故障的原因在于批量期间,产生大量脏块
- ckpt进程持有控制文件的队列锁后,开始进行IO操作
- 但期间rman备份调用alter system archive log的命令,从而触发dbwr进程往数据文件大量的写,导致磁盘100% busy.导致控制文件的IO读写无法快速完成。
- 由于日志切换,因此lgwr进程需要获得控制文件的队列锁,而ckpt进程阻塞了lgwr进程,lgwr进程处于enq: CF – contention状态,无法响应LMS进程的日志写情况。LMS进程负责节点间的GC。因此,GC出现问题,INSERT语句出现超时
数据中心运维建议:
Ø 将控制文件和数据文件从底层RAID组就分开,放到其他磁盘组,避免被DBWR写IO和RMAN读IO影响。
Ø Rman备份脚本中的alter system archive log all命令会导致数据库做全量检查点,触发DBWR的大量IO操作,期间拖慢整体性能
Ø 重新评估 alter system archive log all的替代方式
Ø 将RMAN备份和批量时间错开
##gc相关的等待事件
关于gc的简单描述,就是当实例1 发起请求时发现需要请求的块已经在另一个实例,这个时候就需要通过GCS(LMS)通过私网把相应的块传递到实例1,这个过程就是集群的cache fusion。请求的块又分为当前块和CR。
1、gc buffer busy acquire
当session#1(节点1)尝试请求访问远程实例(remote instance) buffer,但是在session#1之前已经有相同实例上(节点1)另外一个session#2请求访问了相同的buffer,并且没有完成,那么session#1等待gc buffer busy acquire。
出现gc buffer busy acquire的原因一般有以下几种原因:
■ 热点块(hot block)
在AWR中Segments by Global Cache Buffer Busy 记录了访问频繁的gc buffer(对比正常时段),解决方法可以根据热点块的类型采取不同的解决方法,比如采取分区表,分区索引,反向index等等。这点与单机数据库中的buffer busy waits类似。
■ 低效SQL语句
低效SQL语句会导致不必要的buffer被请求访问,增加了buffer busy的机会。在AWR中可以找到TOP SQL。解决方法可以优化SQL语句减少buffer访问。这点与单机数据库中的buffer busy waits类似。
■ 数据交叉访问
RAC数据库,同一数据在不同数据库实例上被请求访问。如果应用程序可以实现,那么我们建议不同的应用功能/模块数据分布在不同的数据库实例上被访问,避免同一数据被多个实例交叉访问,可以减少buffer的争用,避免gc等待。
■ Oracle bug
2、gc cr block busy
指实例1和实例2的buffer cache都含有某个block,某个时刻实例1修改了这个block并且没有提交;这个时刻实例2上的会话1读取这个block需要通过undo record 来构造CR块。且构造的过程中必须将改变记入到实例1的redo,这个时候发生的等待就是gc cr block busy等待。
===================================================== 等待事件 ======================================================================
===================================================== 回收高水位 ======================================================================
如何查询高水位:
ANALYZE TABLE test1 COMPUTE STATISTICS;
select
a.owner,
a.table_name,
a.blocks*to_number(p.value)/1024/1024 size_mb,
a.avg_row_len*a.num_rows/(a.blocks*to_number(p.value))*100 pct,
to_char(a.last_analyzed,'yyyy-mm-dd hh24:mi:ss') last_analyzed
from dba_tables a,v$parameter p
where p.name = 'db_block_size'
and a.blocks > 0
and a.avg_row_len*a.num_rows/(a.blocks*to_number(p.value))*100 < 0.7
and a.blocks*to_number(p.value)/1024/1024 >= 10
and a.owner in ('ULTRANMS');
ALTER TABLE SHRINK command took 20 hours for a 150GB table.
The ALTER TABLE MOVE took 10 hours for a 150GB table.
##采用move(move需要两倍的空间)
1. 回收高水位
针对表
alter table scott.emp move;
针对分区
alter table scott.emp move partition P201209;
2. 重建索引
查看本地索引的状态
select index_name,index_owner,partition_name,status from dba_ind_partitions where index_name in ('IDX_WORKLIST_SUBSID','PK_OM_WL_WORKLIST');
重建本地索引(本地索引肯定是分区索引),可以按整表来重建
alter index NGCRM_JM.IDX_WORKLIST_SUBSID rebuild parallel 8;
重建本地索引(本地索引肯定是分区索引),也可以按分区来重建
alter index NGCRM_JM.IDX_WORKLIST_SUBSID rebuild partition P201209 parallel 8;
查看全局索引的状态(主键肯定是全局索引)
select index_name,index_owner,status from dba_partitions where index_name in ('IDX_WORKLIST_SUBSID','PK_OM_WL_WORKLIST');
重建全局索引
alter index NGCRM_JM.IDX_WORKLIST_SUBSID rebuild parallel 8;
3. 统计信息收集
以下是一些move时候的注意点:
1、move前最好逻辑备份待整理的表;
2、对于大表,建议开启并行和nologging
alter table test move nologging parallel 2;
3、整理完毕后重建相关的索引
alter index ind_test rebuild nologging parallel 2;
4、恢复表和索引的并行度、logging
alter table test logging parallel 1;
5、当我们对table进行move操作时,查询v$locked_objects视图可以发现,table上加了exclusive lock。需要在非业务时段使用
##采用shrink 对整表进行回收高水位,若出现资源繁忙,则考虑采用shrink (shrink不需要额外的空间)
oracle在进行shrink的时候会对相应的索引进行维护,以保证index在shrink结束的时候index仍然有效。
alter table ICDWF.WF_TBL_NODEINFO enable row movement;
alter table ICDWF.WF_TBL_NODEINFO shrink space cascade; --加cascade选项会同时整理索引空间
alter table ICDWF.WF_TBL_NODEINFO disable row movement;
segment shrink分为两个阶段:
1、数据重组(compact):通过一系列insert、delete操作,将数据尽量排列在段的前面。在这个过程中需要在表上加RX锁,即只在需要移动的行上加锁。
在这个过程中会产生大量的undo和redo信息
由于涉及到rowid的改变,需要enable row movement.同时要disable基于rowid的trigger.这一过程对业务影响比较小。
2、HWM调整:第二阶段是调整HWM位置,释放空闲数据块。此过程需要在表上加X锁,会造成表上的所有DML语句阻塞。在业务特别繁忙的系统上可能造成比较大的影响。
注意:shrink space语句两个阶段都执行。
shrink space compact只执行第一个阶段。
如果系统业务比较繁忙,可以先执行shrink space compact重组数据,然后在业务不忙的时候再执行shrink space降低HWM释放空闲数据块。
alter table images shrink space compact; -- do not lock table, run online
alter table images shrink space ; -- lock table, run when users aren't
alter table images modify lob (image) (shrink space);
--耗时案例:
OWNER TABLE_NAME COLUMN_NAME SEGMENT_NAME SEGMENT_TYPE ROUND(B.BYTES/1024/1024)
--------------- -------------------- ------------------------------ ------------------------------ ------------------ ------------------------
HR65_APP SM_BUSILOG_DEFAULT TABLE 8297 --表大小
HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_LOB0000085979C00006$$ LOBSEGMENT 299871 --LOB字段大小
HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_IL0000085979C00006$$ LOBINDEX 593 --LOB索引大小
TABLE_NAME NUM_ROWS ACTUAL_MB HIGH_WATER_MB NEED_RECOVERY_MB
------------------------------ ---------- ---------- ------------- ----------------
SM_BUSILOG_DEFAULT 1838803 2700.57356 8291.44531 5590.87175
alter table HR65_APP.SM_BUSILOG_DEFAULT shrink space compact;
测试库耗时Elapsed: 02:24:12.95
执行完之后表大小,LOB字段大小都没有变化
ALTER TABLE HR65_APP.SM_BUSILOG_DEFAULT MODIFY LOB (LOGMSG) (SHRINK SPACE compact);
测试库耗时Elapsed: 08:02:45.11
执行完之后LOB字段大小没有变化
ALTER TABLE HR65_APP.SM_BUSILOG_DEFAULT SHRINK SPACE CASCADE;
Elapsed: 01:52:45.64
ALTER TABLE HR65_APP.SM_BUSILOG_DEFAULT MODIFY LOB (LOGMSG) (SHRINK SPACE);
Elapsed: 00:16:43.50
OWNER TABLE_NAME COLUMN_NAME SEGMENT_NAME SEGMENT_TYPE ROUND(B.BYTES/1024/1024)
--------------- -------------------- ------------------------------ ------------------------------ ------------------ ------------------------
HR65_APP SM_BUSILOG_DEFAULT TABLE 3430
HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_LOB0000085979C00006$$ LOBSEGMENT 30695
HR65_APP SM_BUSILOG_DEFAULT LOGMSG SYS_IL0000085979C00006$$ LOBINDEX 593
--alter table move和alter table shrink都可以把表收缩和整理碎片,还有一个很大的区别是:
alter table move后表在表空间中的位置肯定会变,可能前移也可能后移,一般来说如果该表前面的表空间中有足够空间容纳该表,则前移,否则后移。
alter table shrink后表在表空间中的位置肯定不变,也就是表的段头位置不会发生变化。
基于这个区别,把一个表空间中的所有表move后,表会都集中到表空间的前半部分了,此时表空间可以收缩。把一个表空间中的所有表shrink后,表空间中表的位置没有发生变化,此时表空间跟shrink之前一样,依然没有多少收缩空间。
#查看高水位下块使用情况
o Allocated, but currently unformatted and unused
o Formatted and contain data
o Formatted and empty because the data was deleted
对于ASSM:
对于ASSM的segment来说,考察HWM下的blocks的空间使用状况相对要简单一些。在这里,我们可以使用这样一个procedure来得到table的blocks使用情况:
create or replace procedure show_space_assm(
p_segname in varchar2,
p_owner in varchar2 default user,
p_type in varchar2 default 'TABLE' )
as
l_fs1_bytes number;
l_fs2_bytes number;
l_fs3_bytes number;
l_fs4_bytes number;
l_fs1_blocks number;
l_fs2_blocks number;
l_fs3_blocks number;
l_fs4_blocks number;
l_full_bytes number;
l_full_blocks number;
l_unformatted_bytes number;
l_unformatted_blocks number;
procedure p( p_label in varchar2, p_num in number )
is
begin
dbms_output.put_line( rpad(p_label,40,'.') ||p_num );
end;
begin
dbms_space.space_usage(
segment_owner => p_owner,
segment_name => p_segname,
segment_type => p_type,
fs1_bytes => l_fs1_bytes,
fs1_blocks => l_fs1_blocks,
fs2_bytes => l_fs2_bytes,
fs2_blocks => l_fs2_blocks,
fs3_bytes => l_fs3_bytes,
fs3_blocks => l_fs3_blocks,
fs4_bytes => l_fs4_bytes,
fs4_blocks => l_fs4_blocks,
full_bytes => l_full_bytes,
full_blocks => l_full_blocks,
unformatted_blocks => l_unformatted_blocks,
unformatted_bytes => l_unformatted_bytes);
p('free space 0-25% Blocks:',l_fs1_blocks);
p('free space 25-50% Blocks:',l_fs2_blocks);
p('free space 50-75% Blocks:',l_fs3_blocks);
p('free space 75-100% Blocks:',l_fs4_blocks);
p('Full Blocks:',l_full_blocks);
p('Unformatted blocks:',l_unformatted_blocks);
end;
/
我们知道,在ASSM下,block的空间使用分为free space: 0-25%,25-50%,50-75%,70-100%,full 这样5中情况,show_space_assm会对需要统计的table汇总这5中类型的block的数量。
我们来看table HWM1的空间使用情况:
SQL> set serveroutput on;
SQL> exec show_space_assm('HWM1','DLINGER');
free space 0-25% Blocks:.................0
free space 25-50% Blocks:...............1
free space 50-75% Blocks:...............0
free space 75-100% Blocks:..............8
Full Blocks:.....................................417
Unformatted blocks:.........................0
这个结果显示,table HWM1,full的block有417个,free space 为75-100% Block有8个,free space 25-50% Block有1个。当table HWM下的blocks的状态大多为free space
较高的值时,我们考虑来合并HWM下的blocks,将空闲的block释放,降低table的HWM。
SQL> set serveroutput on;
SQL> exec show_space_assm('SM_BUSILOG_DEFAULT','HR65_APP');
SQL> exec show_space_assm('SYS_LOB0000085979C00006$$','HR65_APP','LOB');
#查看高水位存储过程
create or replace procedure show_space
( p_segname in varchar2,
p_owner in varchar2 default user,
p_type in varchar2 default 'TABLE',
p_partition in varchar2 default NULL )
as
l_total_blocks number;
l_total_bytes number;
l_unused_blocks number;
l_unused_bytes number;
l_LastUsedExtFileId number;
l_LastUsedExtBlockId number;
l_last_used_block number;
procedure p( p_label in varchar2, p_num in number )
is
begin
dbms_output.put_line( rpad(p_label,40,'.') ||
p_num );
end;
begin
dbms_space.unused_space
( segment_owner => p_owner,
segment_name => p_segname,
segment_type => p_type,
partition_name => p_partition,
total_blocks => l_total_blocks,
total_bytes => l_total_bytes,
unused_blocks => l_unused_blocks,
unused_bytes => l_unused_bytes,
last_used_extent_file_id => l_LastUsedExtFileId,
last_used_extent_block_id => l_LastUsedExtBlockId,
last_used_block => l_last_used_block );
p( 'Total Blocks', l_total_blocks );
p( 'Total Bytes', l_total_bytes );
p( 'Unused Blocks', l_unused_blocks );
p( 'Unused Bytes', l_unused_bytes );
p( 'Last Used Ext FileId', l_LastUsedExtFileId );
p( 'Last Used Ext BlockId', l_LastUsedExtBlockId );
p( 'Last Used Block', l_last_used_block );
end;
===================================================== 回收高水位 ====================================================================
##表碎片(高水位)
SELECT --表
TABLE_NAME,
OWNER,
PARTITIONED,
tablespace_name,
LAST_ANALYZED,
NUM_ROWS,
BLOCKS*8/1024 "SIZE_M(true)", --表的总大小
(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024 "USED_M(need)", --行平均长度*行数 + 事物槽 (实际使用大小)
(BLOCKS*8/1024 - ((AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024)) "FREE_MB(release)", --可以释放大小
(1-(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/8096/BLOCKS)*100 "PCT(shrink)" --释放百分比
FROM DBA_TABLES
where BLOCKS*8/1024 >5 and blocks != 0
and (1-(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/8096/BLOCKS)*100 > 20 --释放百分比大于20%以上
order by "FREE_MB(release)", "SIZE_M(true)", "PCT(shrink)" asc;
SELECT --有lob字段的表
t.TABLE_NAME,
t.OWNER,
LAST_ANALYZED,
b.segment_name,
b.lob_size_m,
NUM_ROWS,
BLOCKS*8/1024 "SIZE_M(true)",
(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024 "USED_M(need)",
(BLOCKS*8/1024 - ((AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/1024/1024)) "FREE_MB(release)",
(1-(AVG_ROW_LEN*NUM_ROWS+24*INI_TRANS)/8096/BLOCKS)*100 "PCT(shrink)"
FROM DBA_TABLES t,
(select * from (
select s.owner, l.table_name, s.segment_name , l.COLUMN_NAME, s.bytes/1024/1024 lob_size_m from dba_segments s, dba_lobs l
where s.segment_name=l.segment_name and segment_type='LOBSEGMENT' order by 5 desc )
where rownum<10) b
where t.OWNER=b.OWNER and t.TABLE_NAME=b.TABLE_NAME and t.BLOCKS*8/1024 >5 and blocks != 0
order by "FREE_MB(release)", "SIZE_M(true)", "PCT(shrink)" asc;
##监控表空间的增长
--每天增长量
select a.days, a.tsname, cur_size_mb,used_size_mb,usage_pert, used_size_mb - lag (used_size_mb,1) over ( partition by a.tsname order by a.tsname,a.days) inc_used_size_mb
from (
select to_char(sp.begin_interval_time,'mm-dd-yyyy') days, ts.tsname,
max(round((tsu.tablespace_size* dt.block_size )/(1024*1024),2) ) cur_size_mb,
max(round((tsu.tablespace_usedsize* dt.block_size )/(1024*1024),2)) used_size_mb,
round(max(tsu.tablespace_usedsize)/max(tsu.tablespace_size)*100, 1) usage_pert
from dba_hist_tbspc_space_usage tsu --依赖AWR SNAPSHOT数据
inner join dba_hist_tablespace_stat ts on tsu.tablespace_id=ts.ts#
inner join dba_hist_snapshot sp on tsu.snap_id =sp.snap_id
inner join dba_tablespaces dt on ts.tsname=dt.tablespace_name
where
--(sp.begin_interval_time > sysdate-2) and
dt.tablespace_name not like '%UNDO%'
group by to_char(sp.begin_interval_time,'mm-dd-yyyy'), ts.tsname
order by ts.tsname, days
) a ;
--平均增长量/天
break on report
compute sum of cur_used_size_mb on report
compute sum of avg_increas_mb on report
select b.tsname tablespace_name, max(b.used_size_mb) cur_used_size_mb, round(avg(inc_used_size_mb),2) avg_increas_mb
from (
select a.days, a.tsname, used_size_mb, used_size_mb - lag (used_size_mb,1) over ( partition by a.tsname order by a.tsname,a.days) inc_used_size_mb
from (
select to_char(sp.begin_interval_time,'mm-dd-yyyy') days, ts.tsname,
max(round((tsu.tablespace_usedsize* dt.block_size )/(1024*1024),2)) used_size_mb
from dba_hist_tbspc_space_usage tsu
inner join dba_hist_tablespace_stat ts on tsu.tablespace_id=ts.ts#
inner join dba_hist_snapshot sp on tsu.snap_id =sp.snap_id
inner join dba_tablespaces dt on ts.tsname=dt.tablespace_name
where
--(sp.begin_interval_time > sysdate-2) and
dt.tablespace_name not like '%UNDO%'
group by to_char(sp.begin_interval_time,'mm-dd-yyyy'), ts.tsname
order by ts.tsname, days
) a
) b
group by b.tsname order by b.tsname;
select sum(space_used_total)/1024/1024/1024 "last 7 days db increase - G"
from
dba_hist_seg_stat s,
dba_hist_seg_stat_obj o,
dba_hist_snapshot sn
where
s.obj# = o.obj#
and
sn.snap_id = s.snap_id
and begin_interval_time > sysdate-8
order by
begin_interval_time
/
#按小时查看归档日志个数 --官方推荐15-20分钟redo切换频率
set linesize 300 pagesize 1000
col Date for a10
col Day for a5
col Total for 99999
col "h00" for a5
col "h01" for a5
col "h02" for a5
col "h03" for a5
col "h04" for a5
col "h05" for a5
col "h06" for a5
col "h07" for a5
col "h08" for a5
col "h09" for a5
col "h10" for a5
col "h11" for a5
col "h12" for a5
col "h13" for a5
col "h14" for a5
col "h15" for a5
col "h16" for a5
col "h17" for a5
col "h18" for a5
col "h19" for a5
col "h20" for a5
col "h21" for a5
col "h22" for a5
col "h23" for a5
select trunc(first_time) "Date",to_char(first_time, 'Dy') "Day",count(1) as "Total",
substr(SUM(decode(to_char(first_time, 'hh24'),'00',1,0)),1,3) as "h00",
substr(SUM(decode(to_char(first_time, 'hh24'),'01',1,0)),1,3) as "h01",
substr(SUM(decode(to_char(first_time, 'hh24'),'02',1,0)),1,3) as "h02",
substr(SUM(decode(to_char(first_time, 'hh24'),'03',1,0)),1,3) as "h03",
substr(SUM(decode(to_char(first_time, 'hh24'),'04',1,0)),1,3) as "h04",
substr(SUM(decode(to_char(first_time, 'hh24'),'05',1,0)),1,3) as "h05",
substr(SUM(decode(to_char(first_time, 'hh24'),'06',1,0)),1,3) as "h06",
substr(SUM(decode(to_char(first_time, 'hh24'),'07',1,0)),1,3) as "h07",
substr(SUM(decode(to_char(first_time, 'hh24'),'08',1,0)),1,3) as "h08",
substr(SUM(decode(to_char(first_time, 'hh24'),'09',1,0)),1,3) as "h09",
substr(SUM(decode(to_char(first_time, 'hh24'),'10',1,0)),1,3) as "h10",
substr(SUM(decode(to_char(first_time, 'hh24'),'11',1,0)),1,3) as "h11",
substr(SUM(decode(to_char(first_time, 'hh24'),'12',1,0)),1,3) as "h12",
substr(SUM(decode(to_char(first_time, 'hh24'),'13',1,0)),1,3) as "h13",
substr(SUM(decode(to_char(first_time, 'hh24'),'14',1,0)),1,3) as "h14",
substr(SUM(decode(to_char(first_time, 'hh24'),'15',1,0)),1,3) as "h15",
substr(SUM(decode(to_char(first_time, 'hh24'),'16',1,0)),1,3) as "h16",
substr(SUM(decode(to_char(first_time, 'hh24'),'17',1,0)),1,3) as "h17",
substr(SUM(decode(to_char(first_time, 'hh24'),'18',1,0)),1,3) as "h18",
substr(SUM(decode(to_char(first_time, 'hh24'),'19',1,0)),1,3) as "h19",
substr(SUM(decode(to_char(first_time, 'hh24'),'20',1,0)),1,3) as "h20",
substr(SUM(decode(to_char(first_time, 'hh24'),'21',1,0)),1,3) as "h21",
substr(SUM(decode(to_char(first_time, 'hh24'),'22',1,0)),1,3) as "h22",
substr(SUM(decode(to_char(first_time, 'hh24'),'23',1,0)),1,3) as "h23"
from v$log_history
group by trunc(first_time), to_char(first_time, 'Dy')
order by 1;
##每天产生的归档量
alter session set NLS_DATE_FORMAT='YYYY-MON-DD';
select trunc(completion_time) rundate
,count(*) logswitch
,round((sum(blocks*block_size)/1024/1024)) "REDO PER DAY (MB)"
from v$archived_log
group by trunc(completion_time)
order by 1;
每小时切换次数
select trunc(first_time, 'HH') , count(*)
from v$loghist
group by trunc(first_time, 'HH')
order by trunc(first_time, 'HH');
set line 300 pagesize 1000
col cmd for a100
select /*+ ordered use_hash(a,c) */ 'alter database datafile '''||a.file_name||''' resize ' ||round(a.filesize - (a.filesize - c.hwmsize-100) *0.8)||'M;' cmd, a.filesize, c.hwmsize, a.filesize-round(a.filesize - (a.filesize - c.hwmsize-100) *0.8) shrink_size
from (select file_id,file_name,round(bytes/1024/1024) filesize from dba_data_files) a,
(select file_id,round(max(block_id)*8/1024) HWMsize from dba_extents group by file_id) c
where a.file_id = c.file_id and a.filesize - c.hwmsize > 100 order by shrink_size;
#临时表空间的使用情况(高水位 rac)
SELECT d.tablespace_name "Tablespace_name",
TO_CHAR(NVL(a.bytes / 1024 / 1024, 0),'99,999,990.900') "Size (M)",
TO_CHAR(NVL(t.hwm, 0)/1024/1024,'99999999.999') "HWM (M)",
TO_CHAR(NVL(t.hwm / a.bytes * 100, 0), '990.00') "HWM % " ,
TO_CHAR(NVL(t.bytes/1024/1024, 0),'99999999.999') "Using (M)",
TO_CHAR(NVL(t.bytes / a.bytes * 100, 0), '990.00') "Using %"
FROM sys.dba_tablespaces d,
(select tablespace_name, sum(bytes) bytes from dba_temp_files group by tablespace_name) a,
(select tablespace_name, sum(bytes_cached) hwm, sum(bytes_used) bytes from gv$temp_extent_pool group by tablespace_name) t
WHERE d.tablespace_name = a.tablespace_name(+)
AND d.tablespace_name = t.tablespace_name(+)
AND d.extent_management like 'LOCAL'
AND d.contents like 'TEMPORARY';
--收缩临时表空间
alter tablespace temp shrink space keep 400M;
alter tablespace temp shrink space;
alter tablespace temp shrink tempfile '/u01/app/oracle/oradata/anqing/temp01.dbf' keep 300M;
alter tablespace temp shrink tempfile '/u01/app/oracle/oradata/anqing/temp01.dbf';
##数据文件碎片
select /*+ ordered use_hash(a,b,c) */ a.file_id,a.file_name,a.filesize, b.freesize,
(a.filesize-b.freesize) usedsize, c.hwmsize, c.hwmsize - (a.filesize-b.freesize) freesize_belowhwm, a.filesize - c.hwmsize canshrinksize
from (select file_id,file_name,round(bytes/1024/1024) filesize from dba_data_files) a,
(select file_id,round(sum(dfs.bytes)/1024/1024) freesize from dba_free_space dfs group by file_id) b,
(select file_id,round(max(block_id)*8/1024) HWMsize from dba_extents group by file_id) c
where a.file_id = b.file_id and a.file_id = c.file_id
order by freesize_belowhwm desc;
#表空间碎片
select a.tablespace_name,
trunc(sqrt(max(blocks)/sum(blocks))* (100/sqrt(sqrt(count(blocks)))),2) fsfi
from dba_free_space a,dba_tablespaces b
where a.tablespace_name=b.tablespace_name
and b.contents not in('TEMPORARY','UNDO')
group by A.tablespace_name
order by fsfi;
#列出使用频率最高的5个查询:
select sql_text,executions
from (select sql_text,executions,
rank() over
(order by executions desc) exec_rank
from v$sql)
where exec_rank <=5;
#消耗磁盘读取最多的sql top5:
select disk_reads,sql_text
from (select sql_text,disk_reads,
dense_rank() over
(order by disk_reads desc) disk_reads_rank
from v$sql)
where disk_reads_rank <=5;
#找出需要大量缓冲读取(逻辑读)操作的查询:
select buffer_gets,sql_text
from (select sql_text,buffer_gets,
dense_rank() over
(order by buffer_gets desc) buffer_gets_rank
from v$sql)
where buffer_gets_rank<=5;
#查看消耗资源最多的SQL
select
b.username username,
a.disk_reads reads,
a.executions exec,
a.disk_reads/decode(a.executions,0,1,a.executions) rds_exec_ratio,
a.sql_text Statement
from v$sqlarea a,dba_users b
where a.parsing_user_id=b.user_id and a.disk_reads > 100000
order by a.disk_reads desc;
#高消耗的会话
col "program" format a38
col "event" format a30
col "username" format a15
select ta.*,round(ta.cpu_time/tb.total_cpu * 100,1) cpu_usage from
(select s.username,s.program,s.event,s.sql_id,sum(trunc(m.CPU)) CPU_TIME,count(*) sum
from v$sessmetric m ,v$session s
where ( m.PHYSICAL_READS >100
or m.CPU>100
or m.LOGICAL_READS >100)
and m.SESSION_ID = s.SID
and m.SESSION_SERIAL_NUM = s.SERIAL#
and s.status = 'ACTIVE'
and username is not null
group by s.username,s.program,s.event,s.sql_id
order by 5 desc) ta,(select sum(cpu) total_cpu from v\$sessmetric) tb
where rownum < 11;
--用buffer_gets列来替换disk_reads列可以得到占用最多内存的sql语句的相关信息
#最消耗CPU
SELECT a.CPU_TIME, --CPU时间 百万分之一(微秒)
a.OPTIMIZER_MODE,--优化方式
a.EXECUTIONS,--执行次数
a.DISK_READS,--读盘次数
a.SHARABLE_MEM,--占用shared pool的内存多少
a.BUFFER_GETS,--读取缓冲区的次数
a.COMMAND_TYPE,--命令类型(3:select,2:insert;6:update;7delete; 47l/sql程序单元)
a.SQL_TEXT,--Sql语句
a.SHARABLE_MEM,
a.PERSISTENT_MEM,
a.RUNTIME_MEM,
a.PARSE_CALLS,
a.DISK_READS,
a.DIRECT_WRITES,
a.CONCURRENCY_WAIT_TIME,
a.USER_IO_WAIT_TIME
from V$SQLAREA a
WHERE PARSING_SCHEMA_NAME = 'DB_USERS'--Schema
order by a.CPU_TIME desc
#查找前10条性能差的sql语句
SELECT * FROM (select PARSING_USER_ID,sql_id,EXECUTIONS,SORTS,COMMAND_TYPE,DISK_READS,sql_text FROM v$sqlarea
order BY disk_reads DESC )where ROWNUM<10 ;
--EXECUTIONS表示同一条SQL语句一共执行了多少次,SORTS表示排序的次数,DISK_READS表示物理读的数量。
#分析性能差的sql
SELECT EXECUTIONS , DISK_READS, BUFFER_GETS,
ROUND((BUFFER_GETS-DISK_READS)/BUFFER_GETS,2) Hit_radio,
ROUND(DISK_READS/EXECUTIONS,2) Reads_per_run,
SQL_TEXT
FROM V$SQLAREA
WHERE EXECUTIONS>0
AND BUFFER_GETS >0
AND (BUFFER_GETS-DISK_READS)/BUFFER_GETS < 0.8
--查询共享池中已经解析过的SQL语句及其相关信息
--EXECUTIONS 所有子游标的执行这条语句次数
--DISK_READS 所有子游标运行这条语句导致的读磁盘次数
--BUFFER_GETS 所有子游标运行这条语句导致的读内存次数
--Hit_radio 命中率
--Reads_per_run 每次执行读写磁盘数
--笼统的说EXECUTIONS,BUFFER_GETS,Hit_radio越高表示读内存多,磁盘少是比较理想的状态,因此越高越好.另外两个越高读磁盘次数越多,因此低点好
##整体资源负载变化(过去500分钟)
SELECT to_char(TIME,'hh24:mi') , S.*
FROM (SELECT NVL(WAIT_CLASS, 'CPU') ACTIVITY,
TRUNC(SAMPLE_TIME, 'MI') TIME
FROM GV$ACTIVE_SESSION_HISTORY) V PIVOT(COUNT(*) FOR ACTIVITY IN ('CPU' AS "CPU", 'Concurrency' AS "Concurrency", 'System I/O' AS "System I/O", 'User I/O' AS "User I/O", 'Administrative' AS "Administrative", 'Configuration' AS "Configuration", 'Application' AS "Application", 'Network' AS "Network", 'Commit' AS "Commit", 'Scheduler' AS "Scheduler", 'Cluster' AS "Cluster", 'Queueing' AS "Queueing", 'Other' AS "Other")) S
WHERE TIME > SYSDATE - INTERVAL '500' MINUTE
ORDER BY TIME;
##内存命中率
--Library Hit %
SELECT round(100 * (SUM(e.PINHITS) - sum(b.pinhits)) /
(SUM(e.PINS) - sum(b.pins)),
2)
FROM DBA_HIST_LIBRARYCACHE b, DBA_HIST_LIBRARYCACHE e
WHERE e.SNAP_ID = &end_SNAP
AND e.DBID = &L_DBID
AND e.INSTANCE_NUMBER = &L_INST_NUM
and b.SNAP_ID = &beg_SNAP
AND b.DBID = &L_DBID
AND b.INSTANCE_NUMBER = &L_INST_NUM
--Parse CPU to Parse Elapsd %:
select round(100 * ((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = &end_snap
AND e.DBID = &L_DBID
AND e.INSTANCE_NUMBER = &L_INST_NUM
AND e.STAT_NAME = 'parse time cpu') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = &beg_snap
AND b.DBID = &L_DBID
AND b.INSTANCE_NUMBER = &L_INST_NUM
AND b.STAT_NAME = 'parse time cpu')) /
((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = &end_snap
AND e.DBID = &L_DBID
AND e.INSTANCE_NUMBER = &L_INST_NUM
AND e.STAT_NAME = 'parse time elapsed') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = &beg_snap
AND b.DBID = &L_DBID
AND b.INSTANCE_NUMBER = &L_INST_NUM
AND b.STAT_NAME = 'parse time elapsed')),
2)
from dual;
-- Soft Parse %:
select round(100 * (1 -
((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = &end_snap
AND e.DBID = &L_DBID
AND e.INSTANCE_NUMBER = &L_INST_NUM
AND e.STAT_NAME = 'parse count (hard)') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = &beg_snap
AND b.DBID = &L_DBID
AND b.INSTANCE_NUMBER = &L_INST_NUM
AND b.STAT_NAME = 'parse count (hard)')) /
((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = &end_snap
AND e.DBID = &L_DBID
AND e.INSTANCE_NUMBER = &L_INST_NUM
AND e.STAT_NAME = 'parse count (total)') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = &beg_snap
AND b.DBID = &L_DBID
AND b.INSTANCE_NUMBER = &L_INST_NUM
AND b.STAT_NAME = 'parse count (total)'))),
2)
from dual;
SELECT round(100 * (SUM(e.PINHITS) - sum(b.pinhits)) /
(SUM(e.PINS) - sum(b.pins)),
2)
FROM DBA_HIST_LIBRARYCACHE b, DBA_HIST_LIBRARYCACHE e
WHERE e.SNAP_ID = 13408
AND e.DBID = b.DBID
AND e.INSTANCE_NUMBER = 1
and b.SNAP_ID = 13409
AND b.INSTANCE_NUMBER = 1
select round(100 * ((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = 13409
AND e.DBID = 3142346740
AND e.INSTANCE_NUMBER = 1
AND e.STAT_NAME = 'parse time cpu') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = 13408
AND b.DBID = 3142346740
AND b.INSTANCE_NUMBER = 1
AND b.STAT_NAME = 'parse time cpu')) /
((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = 13409
AND e.DBID = 3142346740
AND e.INSTANCE_NUMBER = 1
AND e.STAT_NAME = 'parse time elapsed') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = 13408
AND b.DBID = 3142346740
AND b.INSTANCE_NUMBER = 1
AND b.STAT_NAME = 'parse time elapsed')),
2)
from dual;
select round(100 * (1 -
((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = 13409
AND e.DBID = 3142346740
AND e.INSTANCE_NUMBER = 1
AND e.STAT_NAME = 'parse count (hard)') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = 13408
AND b.DBID = 3142346740
AND b.INSTANCE_NUMBER = 1
AND b.STAT_NAME = 'parse count (hard)')) /
((SELECT sum(value)
FROM DBA_HIST_SYSSTAT e
WHERE e.SNAP_ID = 13409
AND e.DBID = 3142346740
AND e.INSTANCE_NUMBER = 1
AND e.STAT_NAME = 'parse count (total)') -
(SELECT sum(value)
FROM DBA_HIST_SYSSTAT b
WHERE b.SNAP_ID = 13408
AND b.DBID = 3142346740
AND b.INSTANCE_NUMBER = 1
AND b.STAT_NAME = 'parse count (total)'))),
2)
from dual;
--检查备份
SELECT start_time, end_time, elapsed_seconds,TRIM (time_taken_display) TIME, status, input_type,
compression_ratio, TRIM (input_bytes_display) inputb,
TRIM (output_bytes_display) outputb,
TRIM (input_bytes_per_sec_display) input,
TRIM (output_bytes_per_sec_display) output,
status
FROM v$rman_backup_job_details
WHERE start_time > TRUNC (SYSDATE) - 22
order by start_time desc ;