代码改变世界

找出SQLServer数据库I/O高的原因

2023-04-27 22:00  abce  阅读(495)  评论(0编辑  收藏  举报

找出SQLServer数据库I/O高的原因

影响SQLServer性能的因素有很多,比如CPU、I/O、内存、错误的执行计划、不恰当的索引或缺少索引等。

当查询变慢时,我发现最常见的一件事是由于查询执行的I/O太大。当一个查询因为I/O而变慢时,可能是因为糟糕的硬件、糟糕的执行计划,但通常是糟糕的数据库设计或缺乏适当的索引。

当你想测量或改进I/O,或者寻找那些使用大量I/O的查询时,你会去哪里?在本文中,我将探索测量和识别数据库实例的I/O相关性能信息的方法。

 

哪些数据库的执行了I/O

首先是要找出哪些数据库指定的I/O;量比较大。知道数据库的I/O性能有助于知道该具体关注哪些数据库。

有很多方法来查看,这里只是介绍使用Dynamic Management Function(DMF)和Dynamic Management Views(DMVs) 。他们是在SQLServer 2005中引入,在2008中得到加强。

DMVs和DMFs收集自sqlserver启动后直至关闭期间的统计信息。收集的信息不会被持久化。可以从sys.dm_io_virtual_file_stats来分析数据库的I/O。

select
	name as 'database name',
	sum ( num_of_reads ) as '读的次数',
	sum ( num_of_bytes_read/1024/1024/1024 ) as 'read(GB)', 
	sum ( num_of_writes ) as '写的次数', 
	sum ( num_of_bytes_written/1024/1024/1024 ) as 'writes(GB)' 
from
	sys.dm_io_virtual_file_stats ( null, null ) i
	inner join sys.databases d on i.database_id = d.database_id 
group by
	name
order by 3 desc, 5 desc;

结果展示:这是自启动后的I/O统计,不能显示当前状态的I/O

 

查看每个数据库最近5分钟的I/O

(执行会有点耗时)

declare @sample table (
  dbname varchar(128) 
 ,numberofreads bigint
 ,numberofwrites bigint
 ,sizeofreads int
 ,sizeofwrites int
 )

insert into @sample 
select name as 'dbname'
      ,sum(num_of_reads) as 'numberofread'
      ,sum(num_of_writes) as 'numberofwrites' 
      ,sum(num_of_bytes_read/1024/1024/1024) as 'sizeofreads'
      ,sum(num_of_bytes_written/1024/1024/1024) as 'sizeofwrites' 
from sys.dm_io_virtual_file_stats(null, null) i
  inner join sys.databases d  
      on i.database_id = d.database_id
group by name 

waitfor delay '00:05:00.000';

select firstsample.dbname
      ,(secondsample.numberofreads - firstsample.numberofreads) as 'number of reads'
      ,(secondsample.numberofwrites - firstsample.numberofwrites) as 'number of writes'
      ,(secondsample.sizeofreads - firstsample.sizeofreads) as 'reads(GB)'
      ,(secondsample.sizeofwrites - firstsample.sizeofwrites) as 'writes(GB)'
from 
(select * from @sample) firstsample
inner join
(select name as 'dbname'
      ,sum(num_of_reads) as 'numberofreads'
      ,sum(num_of_writes) as 'numberofwrites' 
      ,sum(num_of_bytes_read/1024/1024/1024) as 'sizeofreads'
      ,sum(num_of_bytes_written/1024/1024/1024) as 'sizeofwrites' 
from sys.dm_io_virtual_file_stats(null, null) i
  inner join sys.databases d  
      on i.database_id = d.database_id
group by name) as secondsample
on firstsample.dbname = secondsample.dbname
;

 

查看物理磁盘分区的I/O

select left(f.physical_name, 1) as driveletter, 
	dateadd(ms,sample_ms * -1, getdate()) as [start date],
	sum(v.num_of_writes) as total_num_of_writes, 
	sum(v.num_of_bytes_written) as total_num_of_bytes_written, 
	sum(v.num_of_reads) as total_num_of_reads, 
	sum(v.num_of_bytes_read) as total_num_of_bytes_read, 
	sum(v.size_on_disk_bytes) as total_size_on_disk_bytes
from sys.master_files f
inner join sys.dm_io_virtual_file_stats(null, null) v
on f.database_id=v.database_id and f.file_id=v.file_id
group by left(f.physical_name, 1),dateadd(ms,sample_ms * -1, getdate());

 

 

查看磁盘的延迟

使用io_stall*列来统计磁盘延迟

select  left(physical_name, 1) as drive,
        cast(sum(io_stall_read_ms) / 
            (1.0 + sum(num_of_reads)) as numeric(10,1)) 
                          as 'avg_read_disk_latency_ms',
        cast(sum(io_stall_write_ms) / 
            (1.0 + sum(num_of_writes) ) as numeric(10,1)) 
                          as 'avg_write_disk_latency_ms',
        cast((sum(io_stall)) / 
            (1.0 + sum(num_of_reads + num_of_writes)) as numeric(10,1)) 
                          as 'avg_disk_latency_ms'
from    sys.dm_io_virtual_file_stats(null, null) as divfs
        join sys.master_files as mf on mf.database_id = divfs.database_id
                                       and mf.file_id = divfs.file_id
group by left(physical_name, 1)
order by avg_disk_latency_ms desc;

 

查看文件的延迟

SELECT
    [ReadLatency] =
        CASE WHEN [num_of_reads] = 0
            THEN 0 ELSE ([io_stall_read_ms] / [num_of_reads]) END,
    [WriteLatency] =
        CASE WHEN [num_of_writes] = 0
            THEN 0 ELSE ([io_stall_write_ms] / [num_of_writes]) END,
    [Latency] =
        CASE WHEN ([num_of_reads] = 0 AND [num_of_writes] = 0)
            THEN 0 ELSE ([io_stall] / ([num_of_reads] + [num_of_writes])) END,
    [AvgBPerRead] =
        CASE WHEN [num_of_reads] = 0
            THEN 0 ELSE ([num_of_bytes_read] / [num_of_reads]) END,
    [AvgBPerWrite] =
        CASE WHEN [num_of_writes] = 0
            THEN 0 ELSE ([num_of_bytes_written] / [num_of_writes]) END,
    [AvgBPerTransfer] =
        CASE WHEN ([num_of_reads] = 0 AND [num_of_writes] = 0)
            THEN 0 ELSE
                (([num_of_bytes_read] + [num_of_bytes_written]) /
                ([num_of_reads] + [num_of_writes])) END,
    LEFT ([mf].[physical_name], 2) AS [Drive],
    DB_NAME ([vfs].[database_id]) AS [DB],
    [mf].[physical_name]
FROM
    sys.dm_io_virtual_file_stats (NULL,NULL) AS [vfs]
JOIN sys.master_files AS [mf]
    ON [vfs].[database_id] = [mf].[database_id]
    AND [vfs].[file_id] = [mf].[file_id]
-- WHERE [vfs].[file_id] = 2 -- log files
-- ORDER BY [Latency] DESC
-- ORDER BY [ReadLatency] DESC
ORDER BY [WriteLatency] DESC;
GO

建议延迟应该在30ms以下。

 

找出消耗I/O的top SQL

select top 25 DB_NAME( st.[dbid] ) AS [Database]
      ,cp.usecounts as [execution_count]
      ,qs.total_worker_time as cpu
      ,qs.total_elapsed_time as elapsed_time
      ,qs.total_logical_reads as logical_reads
      ,qs.total_logical_writes as logical_writes
      ,qs.total_physical_reads as physical_reads 
      ,qs.creation_time AS [CreationTime]
      ,substring(text, 
                   case when statement_start_offset = 0 
                          or statement_start_offset is null  
                           then 1  
                           else statement_start_offset/2 + 1 end, 
                   case when statement_end_offset = 0 
                          or statement_end_offset = -1  
                          or statement_end_offset is null  
                           then len(text)  
                           else statement_end_offset/2 end - 
                     case when statement_start_offset = 0 
                            or statement_start_offset is null 
                             then 1  
                             else statement_start_offset/2  end + 1 
                  )  as [statement]
      ,st.text AS [Complete Query Text]				  
from sys.dm_exec_query_stats qs  
   join sys.dm_exec_cached_plans cp on qs.plan_handle = cp.plan_handle 
   cross apply sys.dm_exec_sql_text(cp.plan_handle) st
order by qs.total_logical_reads desc;

SELECT TOP
        ( 25 ) DB_NAME( t.[dbid] ) AS [Database],
        REPLACE( REPLACE( LEFT ( t.[text], 255 ), CHAR ( 10 ), '' ), CHAR ( 13 ), '' ) AS [ShortQueryTXT],
        qs.total_logical_reads AS [TotalLogicalReads],
        qs.min_logical_reads AS [MinLogicalReads],
        qs.total_logical_reads/ qs.execution_count AS [AvgLogicalReads],
        qs.max_logical_reads AS [MaxLogicalReads],
    qs.total_physical_reads AS [TotalPhysicalReads],
    qs.min_physical_reads AS [MinPhysicalReads],
    qs.total_physical_reads/ qs.execution_count AS [AvgPhysicalReads],
    qs.max_physical_reads AS [MaxPhysicalReads],
        qs.min_worker_time AS [MinWorkerTime],
        qs.total_worker_time/ qs.execution_count AS [AvgWorkerTime],
        qs.max_worker_time AS [MaxWorkerTime],
        qs.min_elapsed_time AS [MinElapsedTime],
        qs.total_elapsed_time/ qs.execution_count AS [AvgElapsedTime],
        qs.max_elapsed_time AS [MaxElapsedTime],
        qs.execution_count AS [ExecutionCount],
CASE
                WHEN CONVERT ( nvarchar ( MAX ), qp.query_plan ) LIKE N'%%' THEN
                1 ELSE 0
        END AS [HasMissingIX],
        qs.creation_time AS [CreationTime],
        t.[text] AS [Complete Query Text],
        qp.query_plan AS [QueryPlan]
FROM
        sys.dm_exec_query_stats AS qs WITH ( NOLOCK ) 
		CROSS APPLY sys.dm_exec_sql_text ( plan_handle ) AS t CROSS APPLY sys.dm_exec_query_plan ( plan_handle ) AS qp
ORDER BY
        (total_logical_reads + total_logical_writes) / Execution_count DESC OPTION ( RECOMPILE )

sys.dm_exec_query_plan包含的执行计划可以是缓存的、也可以是正在执行的。