distribution库数据文件过大

 

需求背景:

distribution 数据文件相当大,每次运行清理job ( Distribution clean up: distribution ) 都会消耗很大的io

主要体现在表distribution.dbo.MSrepl_commands 包含的记录数量非常大

常见原因

1)清理job ( Distribution clean up: distribution ) 对distribution.dbo.MSrepl_commands 的清理速度跟不上数据进入的速度,

2)在过期时间内有大量的同步分发

3)或者其让他原因

 


 

方法一


修改相关sp加快清理速度,调用相关sp的job是 
Distribution clean up: distribution 

sp的层次结构如下:

sp_MSdistribution_cleanup
  sp_MSdistribution_delete
    sp_MSdelete_publisherdb_trans
      sp_MSdelete_dodelete

住要修改最后2层的sp( sp_MSdelete_dodelete,sp_MSdelete_publisherdb_trans )

分别将DELETE TOP(2000) MSrepl_commands  和 delete TOP(5000) MSrepl_transactions 的top数量调大到合适的数量级

note:注意同时修改判断条件

以下是修改后的sp

use distribution
GO
-- New delete stored procedure WITH RECOMPILE  
-- Note: this function is currently called from sp_MSdelete_publisherdb_trans only  
--   and due to the removal of "set rowcount", the TOP(5000) has been added here also,  
--   if a change needs to be made, check that proc also  
alter PROCEDURE sp_MSdelete_dodelete  
 @publisher_database_id int,  
 @max_xact_seqno varbinary(16),  
 @last_xact_seqno varbinary(16),  
 @last_log_xact_seqno varbinary(16),  
 @has_immediate_sync bit = 1  
WITH RECOMPILE  
as  
begin  
  declare @second_largest_log_xact_seqno varbinary(16)  
  set @second_largest_log_xact_seqno = 0x0  
  
  if @last_log_xact_seqno is not NULL  
  begin  
   --get the second largest xact_seqno among log entries  
   select @second_largest_log_xact_seqno = max(xact_seqno)  
   from MSrepl_transactions  
   where publisher_database_id = @publisher_database_id  
    and xact_id <> 0x0  
    and xact_seqno < @last_log_xact_seqno  
  
   if @second_largest_log_xact_seqno is NULL or substring(@second_largest_log_xact_seqno, 1, 10) <> substring(@last_log_xact_seqno, 1, 10)  
   begin  
    set @second_largest_log_xact_seqno = 0x0  
   end  
  end  
  
    
  if @has_immediate_sync = 0  
   delete TOP(50000) MSrepl_transactions WITH (PAGLOCK) from MSrepl_transactions with (INDEX(ucMSrepl_transactions)) where  
    publisher_database_id = @publisher_database_id and  
    xact_seqno <= @max_xact_seqno and  
    xact_seqno <> @last_xact_seqno and  
    xact_seqno <> @last_log_xact_seqno and  
    xact_seqno <> @second_largest_log_xact_seqno --ensure at least two log entries are left, when there existed more than two log entries  
    OPTION (MAXDOP 1)  
  else  
   delete TOP(50000) MSrepl_transactions WITH (PAGLOCK) from MSrepl_transactions with (INDEX(ucMSrepl_transactions)) where  
    publisher_database_id = @publisher_database_id and  
    xact_seqno <= @max_xact_seqno and  
    xact_seqno <> @last_xact_seqno and  
    xact_seqno <> @last_log_xact_seqno and    
    xact_seqno <> @second_largest_log_xact_seqno and --ensure at least two log entries are left, when there existed more than two log entries  
    -- use nolock to avoid deadlock  
    not exists (select * from MSrepl_commands c with (nolock) where  
     c.publisher_database_id = @publisher_database_id and  
     c.xact_seqno = MSrepl_transactions.xact_seqno and   
                    c.xact_seqno <= @max_xact_seqno)  
   OPTION (MAXDOP 1)  
end  
View Code

 

use distribution
GO
alter PROCEDURE sp_MSdelete_publisherdb_trans    
    @publisher_database_id int,    
    @max_xact_seqno varbinary(16),    
 @max_cutoff_time datetime,    
    @num_transactions int OUTPUT,    
    @num_commands int OUTPUT    
    as    
    
 set nocount on    
        
 declare @snapshot_bit int    
 declare @replpost_bit int    
    declare @directory_type int    
    declare @alt_directory_type int    
    declare @scriptexec_type int    
    declare @last_xact_seqno varbinary(16)    
    declare @last_log_xact_seqno varbinary(16)    
    declare @max_immediate_sync_seqno varbinary(16)    
    declare @dir nvarchar(512)    
    declare @row_count int    
    declare @batchsize int    
    declare @retcode int            /* Return value of xp_cmdshell */    
 declare @has_immediate_sync bit    
 declare @xact_seqno varbinary(16)    
 declare @command_id int    
 declare @type int    
 declare @directory nvarchar(1024)    
 declare @syncinit int    
 declare @syncdone int    
    
    select @snapshot_bit = 0x80000000    
 select @replpost_bit = 0x40000000    
    select @directory_type = 7    
    select @alt_directory_type = 25    
 select @scriptexec_type = 46    
 select @syncinit = 37    
 select @syncdone = 38    
    select @num_transactions = 0    
    select @num_commands = 0    
    
       -- Being as this is a cleanup process it is our prefered victim    
       SET DEADLOCK_PRIORITY LOW    
    
 -- If transactions for immediate_sync publications will not be cleanup up until    
 -- they are older than max retention, except for snapshot transactions.    
 -- Snapshot transactions for immediate_sync publication will be cleanup up if it is    
 -- not used by any subscriptions (including virtual and virtual immediate_syncymous    
 -- subscriptions. Both will be reset by snapshot agent every time if the     
 -- publication is snapshot type.) The special logic for snapshot transactions    
 -- is mostly for snapshot publications. It is to cleaup up the snapshot files     
 -- ASAP and not to wait for max retention.    
 -- We don't need to do this for non-immediate_syncymous publications since the snapshot    
 -- trans for them will be removed as soon as they are distributed and min '    
 -- retention is reached.    
    
 -- Detect if there are immediate_syncymous publications in this publishing db.    
 if exists (select * from MSsubscriptions where    
   publisher_database_id = @publisher_database_id and    
   subscriber_id < 0)    
  select @has_immediate_sync = 1    
 else    
  select @has_immediate_sync = 0    
    
 if @has_immediate_sync = 1    
 begin    
  -- if @max_immediate_sync_seqno is null, no row will be deleted based on that.    
  select @max_immediate_sync_seqno = max(xact_seqno)     
   from MSrepl_transactions with (nolock)    
   where publisher_database_id = @publisher_database_id     
    and entry_time <= @max_cutoff_time    
 end    
    
 -- table to store all of the snapshot command seqno that will     
 -- need to be deleted from MSrepl_commands after dir removal    
 declare @snapshot_xact_seqno table (snap_xact_seqno varbinary(16))    
    
    -- Note delete commands first since transaction table will be used for    
    -- geting @max_xact_seqno (see sp_MSmaximum_cleanup_seqno).    
    -- Delete all directories stored in directory command.    
 if @has_immediate_sync = 0    
  declare  hCdirs  CURSOR LOCAL FAST_FORWARD FOR select CONVERT(nvarchar(512), command),    
   xact_seqno, command_id, type    
   from MSrepl_commands with (nolock) where    
   publisher_database_id = @publisher_database_id and    
   xact_seqno <= @max_xact_seqno and       
   ((type & ~@snapshot_bit) = @directory_type or    
            (type & ~@snapshot_bit) = @alt_directory_type or    
            (type & ~@replpost_bit) = @scriptexec_type)    
  for read only    
 else    
  declare  hCdirs  CURSOR LOCAL FAST_FORWARD FOR select CONVERT(nvarchar(512), command),    
   xact_seqno, command_id, type    
   from MSrepl_commands c with (nolock) where    
   publisher_database_id = @publisher_database_id and    
   xact_seqno <= @max_xact_seqno and      
   (    
    -- In this section we skip over script exec because they should only be    
    -- removed when they are out of retention (no subscriptions will ever    
    -- point to the script exec commands so if we didn't exclude them here they    
    -- would always be removed... even when they are needed by subscribers).    
    (    
     (type & ~@snapshot_bit) in (@directory_type, @alt_directory_type)    
      and (    
      -- Select the row if it is older than max retention.    
      xact_seqno <= @max_immediate_sync_seqno or     
      -- Select the row if it is not used by any subscriptions.    
      not exists (select * from MSsubscriptions s where     
         s.publisher_database_id = @publisher_database_id and     
         s.subscription_seqno = c.xact_seqno) OR    
      -- Select the row if it is not for immediate_sync publications    
      -- Note: directory command have article id 0 so it is not useful    
      not exists (select * from MSpublications p where    
        p.publication_id = (select top 1 s.publication_id     
         from MSsubscriptions s where    
         s.publisher_database_id = @publisher_database_id and    
         s.subscription_seqno = c.xact_seqno) and    
        p.immediate_sync = 1)    
     )    
    )    
    -- For script exec only select the row if it is out of retention    
    or ((type & ~@replpost_bit) = @scriptexec_type    
      and xact_seqno <= @max_immediate_sync_seqno)    
   )    
    
  for read only    
    
    open hCdirs    
    fetch hCdirs into @dir, @xact_seqno, @command_id, @type    
    while (@@fetch_status <> -1)    
    begin    
     -- script exec command, need to map to the directory path and remove leading 0 or 1    
  if((@type & ~@replpost_bit) = @scriptexec_type)    
  begin    
   select @dir = left(@dir,len(@dir) - charindex(N'\', reverse(@dir)))    
       
   if left(@dir, 1) in (N'0', N'1')    
   begin    
    select @dir = right(@dir, len(@dir) - 1)    
   end    
  end    
      
  -- Need to map unc to local drive for access problem    
        exec @retcode = sys.sp_MSreplremoveuncdir @dir    
        /* Abort the operation if the delete fails */    
        if (@retcode <> 0 or @@error <> 0)    
            return (1)    
    
  -- build up a list of snapshot commands that will be deleted below    
  -- this list is built because we must cleanup scripts, alt snap paths    
  -- and regular snapshots prior to removing the commands for them...    
  insert into @snapshot_xact_seqno(snap_xact_seqno) values (@xact_seqno)    
    
     fetch hCdirs into @dir, @xact_seqno, @command_id, @type    
    end    
    close hCdirs    
    deallocate hCdirs    
    
 -- delete all of the snapshot commands related to directories that were     
 -- cleaned up. SYNCINIT and SYNCDONE tokens for concurrent snapshot will     
 -- be cleaned up by retention period in the next section below... We do    
 -- not attempt to remove the SYNCINIT or SYNCDONE tokens earlier because    
 -- we have no safe way of associating them with a particular snapshot.    
 -- Also, we can't tell if the tokens are needed by an existing snapshot.    
 WHILE 1 = 1    
    BEGIN    
  DELETE TOP(20000) MSrepl_commands WITH (PAGLOCK) from MSrepl_commands with (INDEX(ucMSrepl_commands))    
   WHERE publisher_database_id = @publisher_database_id     
    AND xact_seqno IN (SELECT DISTINCT snap_xact_seqno     
         FROM @snapshot_xact_seqno)    
   OPTION (MAXDOP 1)    
    
  SELECT @row_count = @@rowcount    
    
  -- Update output parameter    
     SELECT @num_commands = @num_commands + @row_count    
        
        IF @row_count < 20000 -- passed the result set.  We're done    
            BREAK    
 END    
    
    -- Since we're cleaning up, we set the lock timeout to immediate    
    --  this way we shouldn't interfere with the other agents using the table.    
    
    -- Holding off for some testing on this    
    --SET LOCK_TIMEOUT 1    
    
    -- Delete all commans less than or equal to the @max_xact_seqno    
    -- Delete in batch to reduce the transaction size    
    
    WHILE 1 = 1    
    BEGIN    
  if @has_immediate_sync = 0    
   DELETE TOP(20000) MSrepl_commands WITH (PAGLOCK) from MSrepl_commands with (INDEX(ucMSrepl_commands)) where    
    publisher_database_id = @publisher_database_id and    
    xact_seqno <= @max_xact_seqno and    
    (type & ~@snapshot_bit) not in (@directory_type, @alt_directory_type) and    
    (type & ~@replpost_bit) <> @scriptexec_type    
    OPTION (MAXDOP 1)    
  else    
   -- Use nolock hint on subscription table to avoid deadlock    
   -- with snapshot agent.    
   DELETE TOP(20000) MSrepl_commands WITH (PAGLOCK) from MSrepl_commands with (INDEX(ucMSrepl_commands)) where    
    publisher_database_id = @publisher_database_id and    
    xact_seqno <= @max_xact_seqno and    
    -- do not delete directory, alt directory or script exec commands. they are deleted     
    -- above. We have to do this because we use a (nolock) hint and we have to make sure we     
    -- don't delete dir commands when the file has not been cleaned up in the code above. It's    
    -- ok to delete snap commands that are out of retention and perform lazy delete of dir    
    (type & ~@snapshot_bit) not in (@directory_type, @alt_directory_type) and    
    (type & ~@replpost_bit) <> @scriptexec_type and    
    (    
     -- Select the row if it is older than max retention.    
     xact_seqno <= @max_immediate_sync_seqno or     
     -- Select the snap cmd if it is not for immediate_sync article    
     -- We know the command is for immediate_sync publication if    
     -- the snapshot tran include articles that has virtual    
     -- subscritptions. (use subscritpion table to avoid join with    
     -- article and publication table). We skip sync tokens because     
     -- they are never pointed to by subscriptions...    
     (    
      (type & @snapshot_bit) <> 0 and    
      (type & ~@snapshot_bit) not in (@syncinit, @syncdone) and    
      not exists (select * from MSsubscriptions s with (nolock) where    
       s.publisher_database_id = @publisher_database_id and    
       s.article_id = MSrepl_commands.article_id and    
       s.subscriber_id < 0)    
     )    
    )    
    OPTION (MAXDOP 1)    
    
  select @row_count = @@rowcount    
        -- Update output parameter    
        select @num_commands = @num_commands + @row_count    
        
        IF @row_count < 20000 -- passed the result set.  We're done    
            BREAK    
    END    
        
    -- get the max transaction row    
    select @last_log_xact_seqno = max(xact_seqno) from MSrepl_transactions    
  where publisher_database_id = @publisher_database_id     
         and xact_id <> 0x0  -- not initial sync transaction    
    
    select @last_xact_seqno = max(xact_seqno) from MSrepl_transactions    
  where publisher_database_id = @publisher_database_id    
    
    -- Remove all transactions less than or equal to the @max_xact_seqno and leave the     
    -- last transaction row    
    -- Note @max_xact_seqno might be null, in this case don't do any thing.    
    -- Delete in batchs to reduce the transaction size    
    
    -- Delete all commans less than or equal to the @max_xact_seqno    
    -- Delete  rows to reduce the transaction size    
    WHILE 1 = 1    
    BEGIN    
  exec dbo.sp_MSdelete_dodelete @publisher_database_id,     
          @max_xact_seqno,     
          @last_xact_seqno,     
          @last_log_xact_seqno,    
          @has_immediate_sync    
    
    
        select @row_count = @@rowcount    
    
        -- Update output parameter    
        select @num_transactions = @num_transactions + @row_count    
        if @row_count < 50000    
            BREAK    
    END    
View Code

 


 

方法二

如果不是必要请不要使用该方法,有次处理突发情况有使用过

1停止每个db的log reader

  该方法停止将事务读取到 distribution.dbo.MSrepl_commands

  以下语句获取log reader

--jobs.name就是sql server agent job的名称
use [distribution]
go
select  
    jobs.name, publisher_db,s.data_source as publisher,
    stop_job='EXEC msdb.dbo.sp_stop_job @job_name = N'''+jobs.name+'''',
    start_job='EXEC msdb.dbo.sp_start_job @job_name = N'''+jobs.name+'''',
    disabled_job='EXEC msdb.dbo.sp_update_job @job_name = N'''+jobs.name+''',@enabled='+cast(0 as varchar(10)),
    enabled_job='EXEC msdb.dbo.sp_update_job @job_name = N'''+jobs.name+''',@enabled='+cast(1 as varchar(10))
From MSlogreader_agents a inner join sys.servers s on a.publisher_id=s.server_id
inner join msdb..sysjobs jobs on a.job_id=jobs.job_id

2 等待已经读取到distribution.dbo.MSrepl_commands的事务分发到目的端,可以使用以下查询为分发的命令数

 With MaxXact (ServerName, PublisherDBID, XactSeqNo)
 As (Select S.name, DA.publisher_database_id, max(H.xact_seqno)
 From distribution.dbo.MSdistribution_history H with(nolock)
 Inner Join distribution.dbo.MSdistribution_agents DA with(nolock)
 On DA.id = H.agent_id
 Inner Join master.sys.servers S with(nolock)
 On S.server_id = DA.subscriber_id
 Group By S.name, DA.publisher_database_id)
 Select MX.ServerName, MX.PublisherDBID, COUNT(*)
 As CommandsNotReplicated
 From distribution.dbo.MSrepl_commands C with(nolock)
 Right Join MaxXact MX On
 MX.XactSeqNo < C.xact_seqno And
 MX.PublisherDBID = C.publisher_database_id
 Group By MX.ServerName, MX.PublisherDBID;

3数据都分发下去后,可以truncate distribution.dbo.MSrepl_commands 的数据

  note:不推荐使用步骤3 全部truncate,本人跟喜欢使用下面的方式删除部分数据

    delete  a
    from distribution.dbo.MSrepl_commands a with(nolock)
    inner join distribution.dbo.MSrepl_transactions b with(nolock)
        on a.publisher_database_id=b.publisher_database_id  
        and a.xact_seqno=b.xact_seqno
        where entry_time<'2015-12-3'

 

posted @ 2016-08-16 18:06  simplelg17  阅读(1960)  评论(0编辑  收藏  举报