如何使用Profiler跟踪阻塞信息

Blocked Process Report
Profiler提供一个被阻塞进程报告的事件Errors and Warnings->Blocked Process Report

指示某个任务已被阻塞,导致超过指定的时间。此事件类不包括系统任务和正在等待未发现死锁的资源的任务。若要配置报告的频率,请使用 sp_configure 命令配置阻塞的进程阈值选项。

默认阈值为0,此时出现阻塞不会触发此事件,可以使用下面脚本设置该阈值为5秒

exec sp_configure 'blocked process threshold',5;
reconfigure;
select * from sys.configurations 
--where name='blocked process threshold (s)'
order by name
View Code

我们运行下面语句,创建一个阻塞的查询

--创建测试数据表
IF (SELECT  OBJECT_ID('dbo.t1')
   ) IS NOT NULL 
    DROP TABLE dbo.t1
GO
CREATE TABLE dbo.t1
    (c1 INT
    ,c2 INT
    ,c3 DATETIME)
INSERT  INTO dbo.t1
VALUES  (11, 12, GETDATE())
INSERT  INTO dbo.t1
VALUES  (21, 22, GETDATE())

--会话1执行
BEGIN TRAN user1
    UPDATE dbo.t1 SET c3=GETDATE()
--ROLLBACK
    
--会话2执行    
BEGIN TRAN user2
    SELECT c2 FROM dbo.t1 WHERE c1=11
--ROLLBACK
View Code

在超过被阻塞进程阈值之后,可以在Profiler中看到Blocked Process Report事件被触发

如果保持代码一直运行,每5秒它将被触发一次,所生成的是一个XML文件

<blocked-process-report>
 <blocked-process>
  <process id="process14f4c8" taskpriority="0" logused="0" waitresource="RID: 6:1:90:0" waittime="36021" ownerId="85965" transactionname="user2" lasttranstarted="2016-12-11T22:44:17.640" XDES="0xcd51730" lockMode="S" schedulerid="1" kpid="1872" status="suspended" spid="56" sbid="0" ecid="0" priority="0" trancount="1" lastbatchstarted="2016-12-11T22:44:17.640" lastbatchcompleted="2016-12-11T22:44:12.160" lastattention="2016-12-11T21:18:25.980" clientapp="Microsoft SQL Server Management Studio - 查询" hostname="VUEST" hostpid="1700" loginname="VUEST\Vuest" isolationlevel="read committed (2)" xactid="85965" currentdb="6" lockTimeout="4294967295" clientoption1="671090784" clientoption2="390200">
   <executionStack>
    <frame line="2" stmtstart="38" sqlhandle="0x020000001be0f42a9b3a6fd8eae5b4ae648e87cc70b1f6ba"/>
    <frame line="2" stmtstart="38" sqlhandle="0x0200000008b74f04b02b5eed91a834b9706e0fc33245acd6"/>
   </executionStack>
   <inputbuf>
begin tran user2
    select c2 from dbo.t1 where c1=11   </inputbuf>
  </process>
 </blocked-process>
 <blocking-process>
  <process status="sleeping" spid="52" sbid="0" ecid="0" priority="0" trancount="1" lastbatchstarted="2016-12-11T22:44:09.750" lastbatchcompleted="2016-12-11T22:44:09.790" clientapp="Microsoft SQL Server Management Studio - 查询" hostname="VUEST" hostpid="1700" loginname="VUEST\Vuest" isolationlevel="read committed (2)" xactid="85956" currentdb="6" lockTimeout="4294967295" clientoption1="671088672" clientoption2="128056">
   <executionStack/>
   <inputbuf>
begin tran user1
    update dbo.t1 set c3=GETDATE()   </inputbuf>
  </process>
 </blocking-process>
</blocked-process-report>
View Code

此时用语句获取阻塞信息

        --得到阻塞对应关系        
        SELECT distinct
               wt.session_id                            AS BlockedSessionId,
               DB_NAME(tl.resource_database_id)            AS DatabaseName,
               wt.wait_type                                AS WaitType,
               tl.request_mode+':'+tl.resource_type+':'+tl.resource_description        AS WaitResource,
               se1.last_request_start_time                AS BlockedTime,ec1.last_read,
               wt.wait_duration_ms                        AS WaitDuration_ms,
               ec1.client_net_address                    AS BlockedClientAddress,
               --h1.text AS BlockedStmt,
               SUBSTRING(
                   h1.text,
                   (er1.statement_start_offset / 2) + 1,
                   (
                       (
                           CASE 
                                WHEN er1.statement_end_offset = -1 THEN LEN(CONVERT(NVARCHAR(MAX), h1.text)) 
                                     * 2
                                ELSE er1.statement_end_offset
                           END - er1.statement_start_offset
                       ) / 2
                   ) + 1
               )                                        AS BlockedStmt,
               wt.blocking_session_id                    AS BlockingSessionId,
               se2.transaction_isolation_level            AS BlockingIsolationLevel,
               wt.resource_description                    AS BlockingHoldResource,
               --h2.text AS BlockingStmt,
               SUBSTRING(
                   h2.text,
                   (ISNULL(er2.statement_start_offset, 0) / 2) + 1,
                   (
                       (
                           CASE 
                                WHEN ISNULL(er2.statement_end_offset, 0) <= 0 THEN 
                                     LEN(CONVERT(NVARCHAR(MAX), h2.text)) 
                                     * 2
                                ELSE er2.statement_end_offset
                           END - ISNULL(er2.statement_start_offset, 0)
                       ) / 2
                   ) + 1
               )                                    AS BlockingStmt,
               ec2.client_net_address                AS BlockingClientAddress,
               se2.program_name                        AS BlockingProgramName,
               se2.login_name                        AS BlockingLoginame,
               GETDATE()                            AS InsertTime
        FROM   sys.dm_tran_locks AS tl WITH(NOLOCK) --会过滤并行问题CXPACKET
        INNER JOIN sys.dm_os_waiting_tasks AS wt WITH(NOLOCK)
             ON  wt.resource_address = tl.lock_owner_address
        INNER JOIN sys.dm_exec_connections ec1 WITH(NOLOCK)
             ON  ec1.session_id = wt.session_id
        INNER JOIN sys.dm_exec_connections ec2 WITH(NOLOCK)
             ON  ec2.session_id = wt.blocking_session_id
        INNER JOIN sys.dm_exec_sessions AS se1 WITH(NOLOCK)
             ON  se1.session_id = ec1.session_id             
        INNER JOIN sys.dm_exec_sessions AS se2 WITH(NOLOCK)
             ON  se2.session_id = ec2.session_id
        INNER JOIN sys.dm_exec_requests er1 WITH(NOLOCK)
             ON  er1.session_id = ec1.session_id
        LEFT JOIN sys.dm_exec_requests er2 WITH(NOLOCK)
             ON  er2.session_id = ec2.session_id
        CROSS APPLY sys.dm_exec_sql_text(ec1.most_recent_sql_handle) AS h1
        CROSS APPLY sys.dm_exec_sql_text(ec2.most_recent_sql_handle) AS h2
View Code


自动化侦测和收集阻塞信息
《SQLServer2008查询性能优化》第355页有说使用[SQL Server警报+作业]来自动侦测和收集阻塞信息。下面参考原文进行测试


[常规]页面计数器Average Wait Time (ms)(我使用计数器Lock Wait Time (ms)一直收不到邮件)对应的是累计值,并且只有当阻塞解除后才会进行累加。例如我重启数据库服务,然后执行前面的创建阻塞语句,只要会话1不回滚/提交,阻塞将一直持续,计数器Average Wait Time (ms)的值不会变动。一旦会话1回滚/提交,计数器Average Wait Time (ms)的值将更新(原值+阻塞时长)。当计数器Average Wait Time (ms)的值满足设定的警报条件时,它将触发警报并响应动作(此例中:执行作业+通知操作员)。之后哪怕没有阻塞,警报还将一直被触发(计数器Average Wait Time (ms)的值一直满足触发警报条件)。警报相关操作可参考:SQL Server代理警报
警报的[响应]页面勾选的执行作业Blocking Analysis,作业执行的语句实际是[将Profiler中定义Blocked Process Report事件导出生成跟踪定义的脚本]

-- Create a Queue
declare @rc int
declare @TraceID int
declare @maxfilesize BIGINT
declare @DateTime datetime
declare @TraceFile nvarchar(256)

set @DateTime = DATEADD(MI,5,GETDATE())
set @maxfilesize = 5
select @TraceFile='F:\TroubleShooting\'+REPLACE(@@SERVERNAME,'\','_')+'_Blocked'

exec @rc = sp_trace_create @TraceID output, 2, @TraceFile, @maxfilesize, @Datetime
if (@rc != 0) goto error

-- Client side File and Table cannot be scripted

-- Set the events
declare @on bit
set @on = 1
exec sp_trace_setevent @TraceID, 137, 1, @on
exec sp_trace_setevent @TraceID, 137, 3, @on
exec sp_trace_setevent @TraceID, 137, 4, @on
exec sp_trace_setevent @TraceID, 137, 12, @on
exec sp_trace_setevent @TraceID, 137, 13, @on
exec sp_trace_setevent @TraceID, 137, 14, @on
exec sp_trace_setevent @TraceID, 137, 15, @on
exec sp_trace_setevent @TraceID, 137, 24, @on
exec sp_trace_setevent @TraceID, 137, 32, @on
exec sp_trace_setevent @TraceID, 137, 41, @on
exec sp_trace_setevent @TraceID, 137, 51, @on

-- Set the Filters
declare @intfilter int
declare @bigintfilter bigint

-- Set the trace status to start
exec sp_trace_setstatus @TraceID, 1

-- display trace id for future references
select TraceID=@TraceID
goto finish

error: 
select ErrorCode=@rc

finish: 
go
View Code

将上面脚本保存到.sql文件,作业步骤类型使用cmdExec,命令中键入:sqlcmd -E -S127.0.0.1,7777 -iF:\TroubleShooting\TraceBlocked.sql
语句查询计数器的取值

select * from sys.dm_os_performance_counters
WHERE [object_name]='MSSQL$SQL08R2:Locks'
AND instance_name='_Total'
ORDER BY counter_name
View Code


收到的邮件

个人对这种自动化侦测和收集不太满意,原因就是警报触发条件所使用的计数器是累计值。更多的时候我们希望遇到阻塞,并且阻塞时长持续>某个值的时候能有提醒。因此倾向于定期从sys.dm_os_waiting_tasks中获取阻塞信息。

posted @ 2016-12-13 20:53  Uest  阅读(755)  评论(2编辑  收藏  举报