mariadb gtid故障处理记录

mariadb gtid故障处理记录

复制代码
set sql_log_bin = 0;

create user 'repl'@'%' identified by 'Oi8qcQXMnBvh2vrU';
grant replication slave on *.* to 'repl'@'%';


set sql_log_bin = 1;


set sql_log_bin = 0;
CREATE DATABASE `dblww` /*!40100 DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci */;
set sql_log_bin = 1;

set sql_log_bin = 0;
use dblww;
source /root/dblww.dump.sql ;
set sql_log_bin = 1;


SET GLOBAL gtid_slave_pos='0-330623-215';
CHANGE MASTER TO
 MASTER_HOST='192.168.142.236',
 MASTER_USER='repl',
 MASTER_PASSWORD='Oi8qcQXMnBvh2vrU',
 MASTER_PORT=3306,
 MASTER_USE_GTID=slave_pos,
 MASTER_CONNECT_RETRY=60;
 
START SLAVE;
show slave status\G;

# 模拟1032故障
set sql_log_bin = 0;
use dblww;
delete from mi where phone='13821320635';
set sql_log_bin = 1;

# 出现错误
root@MariaDB0 15:05:  [dblww]> show slave status\G;
*************************** 1. row ***************************
                Slave_IO_State: Waiting for master to send event
                   Master_Host: 192.168.142.236
                   Master_User: repl
                   Master_Port: 3306
                 Connect_Retry: 60
               Master_Log_File: mybinlog.000003
           Read_Master_Log_Pos: 64758416
                Relay_Log_File: mariadb-relay-bin.000002
                 Relay_Log_Pos: 64758461
         Relay_Master_Log_File: mybinlog.000003
              Slave_IO_Running: Yes
             Slave_SQL_Running: No
               Replicate_Do_DB: 
           Replicate_Ignore_DB: 
            Replicate_Do_Table: 
        Replicate_Ignore_Table: 
       Replicate_Wild_Do_Table: 
   Replicate_Wild_Ignore_Table: 
                    Last_Errno: 1032
                    Last_Error: Could not execute Delete_rows_v1 event on table dblww.mi; Can't find record in 'mi', Error_code: 1032; handler error HA_ERR_KEY_NOT_FOUND; the event's master log mybinlog.000003, end_log_pos 64758385
                  Skip_Counter: 0
           Exec_Master_Log_Pos: 64758163
               Relay_Log_Space: 64759025
               Until_Condition: None
                Until_Log_File: 
                 Until_Log_Pos: 0
            Master_SSL_Allowed: No
            Master_SSL_CA_File: 
            Master_SSL_CA_Path: 
               Master_SSL_Cert: 
             Master_SSL_Cipher: 
                Master_SSL_Key: 
         Seconds_Behind_Master: NULL
 Master_SSL_Verify_Server_Cert: No
                 Last_IO_Errno: 0
                 Last_IO_Error: 
                Last_SQL_Errno: 1032
                Last_SQL_Error: Could not execute Delete_rows_v1 event on table dblww.mi; Can't find record in 'mi', Error_code: 1032; handler error HA_ERR_KEY_NOT_FOUND; the event's master log mybinlog.000003, end_log_pos 64758385
   Replicate_Ignore_Server_Ids: 
              Master_Server_Id: 330623
                Master_SSL_Crl: 
            Master_SSL_Crlpath: 
                    Using_Gtid: Slave_Pos
                   Gtid_IO_Pos: 0-330623-390
       Replicate_Do_Domain_Ids: 
   Replicate_Ignore_Domain_Ids: 
                 Parallel_Mode: optimistic
                     SQL_Delay: 0
           SQL_Remaining_Delay: NULL
       Slave_SQL_Running_State: 
              Slave_DDL_Groups: 116
Slave_Non_Transactional_Groups: 0
    Slave_Transactional_Groups: 59
1 row in set (0.000 sec)

ERROR: No query specified

root@MariaDB0 15:06:  [dblww]> 

#########  具体解决步骤 ########
# 1、在主库上解析日志
[root@db236 logs]# mysqlbinlog --no-defaults --base64-output=decode-rows -vvv --stop-position=64758385 mybinlog.000003 >03.binlog

# 搜索出错的信息:end_log_pos 64758385
# at 64758163
#200309 15:06:25 server id 330623  end_log_pos 64758205 CRC32 0xb7d683a4        GTID 0-330623-390 trans
/*!100001 SET @@session.gtid_seq_no=390*//*!*/;  ######  这个非常重要  ######
BEGIN
/*!*/;
# at 64758205
# at 64758268
#200309 15:06:25 server id 330623  end_log_pos 64758268 CRC32 0x6b5f75de        Annotate_rows:
#Q> delete from mi where phone='13821320635'
#200309 15:06:25 server id 330623  end_log_pos 64758323 CRC32 0x7f9e38b9        Table_map: `dblww`.`mi` mapped to number 7
9
# at 64758323
#200309 15:06:25 server id 330623  end_log_pos 64758385 CRC32 0xc74193b7        Delete_rows: table id 79 flags: STMT_END_F
### DELETE FROM `dblww`.`mi`
### WHERE
###   @1=1 /* INT meta=0 nullable=0 is_null=0 */
###   @2=0 /* INT meta=0 nullable=0 is_null=0 */
###   @3='13821320635' /* STRING(44) meta=65068 nullable=0 is_null=0 */
###   @4='app' /* VARSTRING(80) meta=80 nullable=0 is_null=0 */
###   @5=1463038206 /* TIMESTAMP(0) meta=0 nullable=1 is_null=0 */
# Number of rows: 1
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;
/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;
(END)


#### 开始操作 ####
# 在主库上查看:select @@global.gtid_current_pos;
# 在从库上查看:select @@global.gtid_slave_pos;
stop slave
set global gtid_slave_pos="0-330623-390";
start slave ;
show slave status\G;

####################################### 模拟1062 错误 #####################################

# 模拟1062故障
set sql_log_bin = 0;
use dblww;
insert into mi(uid,phone,channel,createTime) values ('88888','13801180053','app','2020-03-08 15:45:30');
set sql_log_bin = 1;

### 出现故障 ### 
root@MariaDB0 15:46:  [dblww]> show slave status\G;
*************************** 1. row ***************************
                Slave_IO_State: Waiting for master to send event
                   Master_Host: 192.168.142.236
                   Master_User: repl
                   Master_Port: 3306
                 Connect_Retry: 60
               Master_Log_File: mybinlog.000003
           Read_Master_Log_Pos: 64759015
                Relay_Log_File: mariadb-relay-bin.000002
                 Relay_Log_Pos: 965
         Relay_Master_Log_File: mybinlog.000003
              Slave_IO_Running: Yes
             Slave_SQL_Running: No
               Replicate_Do_DB: 
           Replicate_Ignore_DB: 
            Replicate_Do_Table: 
        Replicate_Ignore_Table: 
       Replicate_Wild_Do_Table: 
   Replicate_Wild_Ignore_Table: 
                    Last_Errno: 1062
                    Last_Error: Could not execute Write_rows_v1 event on table dblww.mi; Duplicate entry '11' for key 'PRIMARY', Error_code: 1062; handler error HA_ERR_FOUND_DUPP_KEY; the event's master log mybinlog.000003, end_log_pos 64758984
                  Skip_Counter: 0
           Exec_Master_Log_Pos: 64758699
               Relay_Log_Space: 1592
               Until_Condition: None
                Until_Log_File: 
                 Until_Log_Pos: 0
            Master_SSL_Allowed: No
            Master_SSL_CA_File: 
            Master_SSL_CA_Path: 
               Master_SSL_Cert: 
             Master_SSL_Cipher: 
                Master_SSL_Key: 
         Seconds_Behind_Master: NULL
 Master_SSL_Verify_Server_Cert: No
                 Last_IO_Errno: 0
                 Last_IO_Error: 
                Last_SQL_Errno: 1062
                Last_SQL_Error: Could not execute Write_rows_v1 event on table dblww.mi; Duplicate entry '11' for key 'PRIMARY', Error_code: 1062; handler error HA_ERR_FOUND_DUPP_KEY; the event's master log mybinlog.000003, end_log_pos 64758984
   Replicate_Ignore_Server_Ids: 
              Master_Server_Id: 330623
                Master_SSL_Crl: 
            Master_SSL_Crlpath: 
                    Using_Gtid: Slave_Pos
                   Gtid_IO_Pos: 0-330623-392
       Replicate_Do_Domain_Ids: 
   Replicate_Ignore_Domain_Ids: 
                 Parallel_Mode: optimistic
                     SQL_Delay: 0
           SQL_Remaining_Delay: NULL
       Slave_SQL_Running_State: 
              Slave_DDL_Groups: 116
Slave_Non_Transactional_Groups: 0
    Slave_Transactional_Groups: 64
1 row in set (0.000 sec)

ERROR: No query specified

root@MariaDB0 15:46:  [dblww]> 

### 具体步骤 ###
#1、解析binlog文件
[root@db236 logs]# mysqlbinlog --no-defaults --base64-output=decode-rows -vvv --stop-position=64758984 mybinlog.000003 >/tmp/03.binlog
#2、搜索内容:end_log_pos 64758984
/*!100001 SET @@session.gtid_seq_no=392*//*!*/;
BEGIN
/*!*/;
# at 64758741
# at 64758867
#200309 15:45:49 server id 330623  end_log_pos 64758867 CRC32 0x6936cf80        Annotate_rows:
#Q> insert into mi(uid,phone,channel,createTime) values ('88888','13801180053','app','2020-03-08 15:45:30')
#200309 15:45:49 server id 330623  end_log_pos 64758922 CRC32 0xb7caa27f        Table_map: `dblww`.`mi` mapped to number 7
9
# at 64758922
#200309 15:45:49 server id 330623  end_log_pos 64758984 CRC32 0x486f5dcc        Write_rows: table id 79 flags: STMT_END_F
### INSERT INTO `dblww`.`mi`
### SET
###   @1=11 /* INT meta=0 nullable=0 is_null=0 */
###   @2=88888 /* INT meta=0 nullable=0 is_null=0 */
###   @3='13801180053' /* STRING(44) meta=65068 nullable=0 is_null=0 */
###   @4='app' /* VARSTRING(80) meta=80 nullable=0 is_null=0 */
###   @5=1583653530 /* TIMESTAMP(0) meta=0 nullable=1 is_null=0 */
# Number of rows: 1
DELIMITER ;
# End of log file
ROLLBACK /* added by mysqlbinlog */;
/*!50003 SET COMPLETION_TYPE=@OLD_COMPLETION_TYPE*/;
/*!50530 SET @@SESSION.PSEUDO_SLAVE_MODE=0*/;

# 对于1062错误,我们可以直接在从库中,删除掉该条记录即可;
set sql_log_bin = 0;
use dblww;
delete from mi where id=11;
set sql_log_bin = 1;
stop slave ;
start slave;
show slave status\G;


# 解决gtid故障
首先通过查看master的current_pos和当前slave的slave_pos确定需要从哪个gtid_slave_pos继续主从同步
select @@global.gtid_current_pos;
select @@global.gtid_slave_pos;

然后slave上执行
stop slave; //如果有多个slave复制通道(多主复制),需要全部都停
set global gtid_slave_pos="[new_slave_pos]";
start slave;

mysql> select @@global.gtid_slave_pos;
+-------------------------+
| @@global.gtid_slave_pos |
+-------------------------+
| 0-4-4                  |
+-------------------------+
1 row in set (0.00 sec)

mysql> stop slave;
Query OK, 0 rows affected (0.01 sec)

mysql> set global gtid_slave_pos="0-4-5";
Query OK, 0 rows affected (0.32 sec)

mysql> start slave;
Query OK, 0 rows affected (0.11 sec)

mysql> show slave status\G;

###########  percona mysql gtid的故障处理 ############

mysql> stop slave;
#通过show slave status/G;找到Retrieved_Gtid_Set:7800a22c-95ae-11e4-983d-080027de205a:12
mysql> set GTID_NEXT='7800a22c-95ae-11e4-983d-080027de205a:12'
mysql> begin;commit;
mysql> set GTID_NEXT='AUTOMATIC';
mysql> start slave;
复制代码

 

posted @   davie2020  阅读(433)  评论(0编辑  收藏  举报
(评论功能已被禁用)
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· Manus爆火,是硬核还是营销?
· 一文读懂知识蒸馏
· 终于写完轮子一部分:tcp代理 了,记录一下
点击右上角即可分享
微信分享提示