SQL Server 等待统计信息基线收集
背景
我们随时监控每个服务器不同时间段的wait statistics ,可以根据监控信息大概判断什么时候开始出现异常,相当于一个wait statistics基线收集,还可以具体分析占比高的等待类型可以大概推断出当前服务器存在一些什么性能瓶颈。当然如需分析到具体的语句也可以分析数据库上收集的Trace文件或其他手段。
测试环境
Microsoft SQL Server 2012 - 11.0.2100.60 (X64)
Feb 10 2012 19:39:15
Copyright (c) Microsoft Corporation
Enterprise Edition (64-bit) on Windows NT 6.2 <X64> (Build 9200: )
实现过程
a.新建库
1 USE [master]; 2 CREATE DATABASE [BaselineData] ON PRIMARY ( NAME = N'BaselineData', FILENAME = N'D:\data\BaselineData.mdf', SIZE = 512MB, FILEGROWTH = 512MB ) 3 LOG ON ( NAME = N'BaselineData_log', FILENAME = N'E:\log\BaselineData_log.ldf', SIZE = 128MB, FILEGROWTH = 512MB ); 4 ALTER DATABASE [BaselineData] SET RECOVERY SIMPLE;
b.新建表
1 USE [BaselineData]; 2 IF NOT EXISTS ( SELECT * 3 FROM [sys].[tables] 4 WHERE [name] = N'WaitStats' 5 AND [type] = N'U' ) 6 CREATE TABLE [dbo].[WaitStats] 7 ( 8 [RowNum] [BIGINT] IDENTITY(1, 1) , 9 [CaptureDate] [DATETIME] , 10 [WaitType] [NVARCHAR](120) , 11 [Wait_S] [DECIMAL](14, 2) , 12 [Resource_S] [DECIMAL](14, 2) , 13 [Signal_S] [DECIMAL](14, 2) , 14 [WaitCount] [BIGINT] , 15 [Percentage] [DECIMAL](4, 2) , 16 [AvgWait_S] [DECIMAL](14, 2) , 17 [AvgRes_S] [DECIMAL](14, 2) , 18 [AvgSig_S] [DECIMAL](14, 2) 19 ); 20 CREATE CLUSTERED INDEX CI_WaitStats ON [dbo].[WaitStats] ([RowNum], [CaptureDate]);
c.查询等待统计信息
1 USE [BaselineData]; 2 INSERT INTO dbo.WaitStats 3 ( [capturedate] , 4 [WaitType] 5 ) 6 VALUES ( GETDATE() , 7 'Wait Statistics for ' + CAST(GETDATE() AS NVARCHAR(19)) 8 ); 9 INSERT INTO dbo.WaitStats 10 ( [CaptureDate] , 11 [WaitType] , 12 [Wait_S] , 13 [Resource_S] , 14 [Signal_S] , 15 [WaitCount] , 16 [Percentage] , 17 [AvgWait_S] , 18 [AvgRes_S] , 19 [AvgSig_S] 20 ) 21 EXEC 22 ( ';WITH [Waits] AS 23 (SELECT 24 [wait_type], 25 [wait_time_ms] / 1000.0 AS [WaitS], 26 ([wait_time_ms] - [signal_wait_time_ms]) / 1000.0 AS [ResourceS], 27 [signal_wait_time_ms] / 1000.0 AS [SignalS], 28 [waiting_tasks_count] AS [WaitCount], 29 100.0 * [wait_time_ms] / SUM ([wait_time_ms]) OVER() AS [Percentage], 30 ROW_NUMBER() OVER(ORDER BY [wait_time_ms] DESC) AS [RowNum] 31 FROM sys.dm_os_wait_stats 32 WHERE [wait_type] NOT IN ( 33 N''BROKER_EVENTHANDLER'', N''BROKER_RECEIVE_WAITFOR'', 34 N''BROKER_TASK_STOP'', N''BROKER_TO_FLUSH'', 35 N''BROKER_TRANSMITTER'', N''CHECKPOINT_QUEUE'', 36 N''CHKPT'', N''CLR_AUTO_EVENT'', 37 N''CLR_MANUAL_EVENT'', N''CLR_SEMAPHORE'', 38 39 -- Maybe uncomment these four if you have mirroring issues 40 N''DBMIRROR_DBM_EVENT'', N''DBMIRROR_EVENTS_QUEUE'', 41 N''DBMIRROR_WORKER_QUEUE'', N''DBMIRRORING_CMD'', 42 43 N''DIRTY_PAGE_POLL'', N''DISPATCHER_QUEUE_SEMAPHORE'', 44 N''EXECSYNC'', N''FSAGENT'', 45 N''FT_IFTS_SCHEDULER_IDLE_WAIT'', N''FT_IFTSHC_MUTEX'', 46 47 -- Maybe uncomment these six if you have AG issues 48 N''HADR_CLUSAPI_CALL'', N''HADR_FILESTREAM_IOMGR_IOCOMPLETION'', 49 N''HADR_LOGCAPTURE_WAIT'', N''HADR_NOTIFICATION_DEQUEUE'', 50 N''HADR_TIMER_TASK'', N''HADR_WORK_QUEUE'', 51 52 N''KSOURCE_WAKEUP'', N''LAZYWRITER_SLEEP'', 53 N''LOGMGR_QUEUE'', N''MEMORY_ALLOCATION_EXT'', 54 N''ONDEMAND_TASK_QUEUE'', 55 N''PREEMPTIVE_XE_GETTARGETSTATE'', 56 N''PWAIT_ALL_COMPONENTS_INITIALIZED'', 57 N''PWAIT_DIRECTLOGCONSUMER_GETNEXT'', 58 N''QDS_PERSIST_TASK_MAIN_LOOP_SLEEP'', N''QDS_ASYNC_QUEUE'', 59 N''QDS_CLEANUP_STALE_QUERIES_TASK_MAIN_LOOP_SLEEP'', 60 N''QDS_SHUTDOWN_QUEUE'', N''REDO_THREAD_PENDING_WORK'', 61 N''REQUEST_FOR_DEADLOCK_SEARCH'', N''RESOURCE_QUEUE'', 62 N''SERVER_IDLE_CHECK'', N''SLEEP_BPOOL_FLUSH'', 63 N''SLEEP_DBSTARTUP'', N''SLEEP_DCOMSTARTUP'', 64 N''SLEEP_MASTERDBREADY'', N''SLEEP_MASTERMDREADY'', 65 N''SLEEP_MASTERUPGRADED'', N''SLEEP_MSDBSTARTUP'', 66 N''SLEEP_SYSTEMTASK'', N''SLEEP_TASK'', 67 N''SLEEP_TEMPDBSTARTUP'', N''SNI_HTTP_ACCEPT'', 68 N''SP_SERVER_DIAGNOSTICS_SLEEP'', N''SQLTRACE_BUFFER_FLUSH'', 69 N''SQLTRACE_INCREMENTAL_FLUSH_SLEEP'', 70 N''SQLTRACE_WAIT_ENTRIES'', N''WAIT_FOR_RESULTS'', 71 N''WAITFOR'', N''WAITFOR_TASKSHUTDOWN'', 72 N''WAIT_XTP_RECOVERY'', 73 N''WAIT_XTP_HOST_WAIT'', N''WAIT_XTP_OFFLINE_CKPT_NEW_LOG'', 74 N''WAIT_XTP_CKPT_CLOSE'', N''XE_DISPATCHER_JOIN'', 75 N''XE_DISPATCHER_WAIT'', N''XE_TIMER_EVENT'') 76 AND [waiting_tasks_count] > 0 77 ) 78 SELECT 79 getdate(), 80 MAX ([W1].[wait_type]) AS [WaitType], 81 CAST (MAX ([W1].[WaitS]) AS DECIMAL (16,2)) AS [Wait_S], 82 CAST (MAX ([W1].[ResourceS]) AS DECIMAL (16,2)) AS [Resource_S], 83 CAST (MAX ([W1].[SignalS]) AS DECIMAL (16,2)) AS [Signal_S], 84 MAX ([W1].[WaitCount]) AS [WaitCount], 85 CAST (MAX ([W1].[Percentage]) AS DECIMAL (5,2)) AS [Percentage], 86 CAST ((MAX ([W1].[WaitS]) / MAX ([W1].[WaitCount])) AS DECIMAL (16,4)) AS [AvgWait_S], 87 CAST ((MAX ([W1].[ResourceS]) / MAX ([W1].[WaitCount])) AS DECIMAL (16,4)) AS [AvgRes_S], 88 CAST ((MAX ([W1].[SignalS]) / MAX ([W1].[WaitCount])) AS DECIMAL (16,4)) AS [AvgSig_S] 89 FROM [Waits] AS [W1] 90 INNER JOIN [Waits] AS [W2] 91 ON [W2].[RowNum] <= [W1].[RowNum] 92 GROUP BY [W1].[RowNum] 93 HAVING SUM ([W2].[Percentage]) - MAX( [W1].[Percentage] ) < 95; -- percentage threshold 94 ' 95 );
d.在Agent 中配置Job调用c中的脚本
详情省略,可以参考数据库出现阻塞及时邮件预警提醒(下)
e.效果如下图,本地测试环境4月5日与5月4日wait statistics前后差异非常明显,4月5日存在比较明显关于CPU方面的问题。
参考资料
http://www.sqlskills.com/blogs/paul/wait-statistics-or-please-tell-me-where-it-hurts/