MySQL高可用方案 MHA之三 master_ip_online_change
主从架构
master:
10.150.20.90 ed3jrdba90
slave:
10.150.20.97 ed3jrdba97
10.150.20.132 ed3jrdba132
manager:
10.150.20.95 ed3jrdba95
vip:
10.150.20.200
master_ip_online_change参数
mha manager 节点上
# vi /etc/mysql_mha/app1.cnf
#手动switchover时候的切换脚本
master_ip_online_change_script= /usr/local/bin/master_ip_online_change
master_ip_online_change_script 指的是手动执行mysql master switchover时执行的切换脚本。
# cat /etc/mysql_mha/app1.cnf
[server default]
manager_log=/data/mysql_mha/app1-manager.log
manager_workdir=/data/mysql_mha/app1
master_binlog_dir=/data/mysql_33061/logs
master_ip_online_change_script= /usr/local/bin/master_ip_online_change
password=mha_monitor
ping_interval=5
remote_workdir=/data/mysql_mha/app1
repl_password=replicator
repl_user=replicator
shutdown_script=""
ssh_user=root
user=mha_monitor
[server1]
hostname=10.150.20.90
port=33061
[server2]
hostname=10.150.20.97
port=33061
[server3]
hostname=10.150.20.132
port=33061
编辑master_ip_online_change脚本,没有使用 keepalived ,通过脚本的方式管理vip
cat /usr/local/bin/master_ip_online_change #!/usr/bin/env perl use strict; use warnings FATAL => 'all'; use Getopt::Long; use MHA::DBHelper; use MHA::NodeUtil; use Time::HiRes qw( sleep gettimeofday tv_interval ); use Data::Dumper; my $_tstart; my $_running_interval = 0.1; my ( $command, $orig_master_is_new_slave, $orig_master_host, $orig_master_ip, $orig_master_port, $orig_master_user, $orig_master_password, $orig_master_ssh_user, $new_master_host, $new_master_ip, $new_master_port, $new_master_user, $new_master_password, $new_master_ssh_user, ); my $vip = '10.150.20.200'; my $brdc = '10.150.20.255'; my $ifdev = 'ens3'; my $key = '1'; my $ssh_start_vip = "/usr/sbin/ip addr add $vip/24 brd $brdc dev $ifdev label $ifdev:$key;/usr/sbin/arping -q -A -c 1 -I $ifdev $vip;iptables -F;"; my $ssh_stop_vip = "/usr/sbin/ip addr del $vip/24 dev $ifdev label $ifdev:$key"; GetOptions( 'command=s' => \$command, 'orig_master_is_new_slave' => \$orig_master_is_new_slave, 'orig_master_host=s' => \$orig_master_host, 'orig_master_ip=s' => \$orig_master_ip, 'orig_master_port=i' => \$orig_master_port, 'orig_master_user=s' => \$orig_master_user, 'orig_master_password=s' => \$orig_master_password, 'orig_master_ssh_user=s' => \$orig_master_ssh_user, 'new_master_host=s' => \$new_master_host, 'new_master_ip=s' => \$new_master_ip, 'new_master_port=i' => \$new_master_port, 'new_master_user=s' => \$new_master_user, 'new_master_password=s' => \$new_master_password, 'new_master_ssh_user=s' => \$new_master_ssh_user, ); exit &main(); sub current_time_us { my ( $sec, $microsec ) = gettimeofday(); my $curdate = localtime($sec); return $curdate . " " . sprintf( "%06d", $microsec ); } sub sleep_until { my $elapsed = tv_interval($_tstart); if ( $_running_interval > $elapsed ) { sleep( $_running_interval - $elapsed ); } } sub get_threads_util { my $dbh = shift; my $my_connection_id = shift; my $running_time_threshold = shift; my $type = shift; $running_time_threshold = 0 unless ($running_time_threshold); $type = 0 unless ($type); my @threads; my $sth = $dbh->prepare("SHOW PROCESSLIST"); $sth->execute(); while ( my $ref = $sth->fetchrow_hashref() ) { my $id = $ref->{Id}; my $user = $ref->{User}; my $host = $ref->{Host}; my $command = $ref->{Command}; my $state = $ref->{State}; my $query_time = $ref->{Time}; my $info = $ref->{Info}; $info =~ s/^\s*(.*?)\s*$/$1/ if defined($info); next if ( $my_connection_id == $id ); next if ( defined($query_time) && $query_time < $running_time_threshold ); next if ( defined($command) && $command eq "Binlog Dump" ); next if ( defined($user) && $user eq "system user" ); next if ( defined($command) && $command eq "Sleep" && defined($query_time) && $query_time >= 1 ); if ( $type >= 1 ) { next if ( defined($command) && $command eq "Sleep" ); next if ( defined($command) && $command eq "Connect" ); } if ( $type >= 2 ) { next if ( defined($info) && $info =~ m/^select/i ); next if ( defined($info) && $info =~ m/^show/i ); } push @threads, $ref; } return @threads; } sub main { if ( $command eq "stop" ) { ## Gracefully killing connections on the current master # 1. Set read_only= 1 on the new master # 2. DROP USER so that no app user can establish new connections # 3. Set read_only= 1 on the current master # 4. Kill current queries # * Any database access failure will result in script die. my $exit_code = 1; eval { ## Setting read_only=1 on the new master (to avoid accident) my $new_master_handler = new MHA::DBHelper(); # args: hostname, port, user, password, raise_error(die_on_error)_or_not $new_master_handler->connect( $new_master_ip, $new_master_port, $new_master_user, $new_master_password, 1 ); print current_time_us() . " Set read_only on the new master.. "; $new_master_handler->enable_read_only(); if ( $new_master_handler->is_read_only() ) { print "ok.\n"; } else { die "Failed!\n"; } $new_master_handler->disconnect(); # Connecting to the orig master, die if any database error happens my $orig_master_handler = new MHA::DBHelper(); $orig_master_handler->connect( $orig_master_ip, $orig_master_port, $orig_master_user, $orig_master_password, 1 ); ## Drop application user so that nobody can connect. Disabling per-session binlog beforehand #$orig_master_handler->disable_log_bin_local(); #print current_time_us() . " Drpping app user on the orig master..\n"; #FIXME_xxx_drop_app_user($orig_master_handler); ## Waiting for N * 100 milliseconds so that current connections can exit my $time_until_read_only = 15; $_tstart = [gettimeofday]; my @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); while ( $time_until_read_only > 0 && $#threads >= 0 ) { if ( $time_until_read_only % 5 == 0 ) { printf "%s Waiting all running %d threads are disconnected.. (max %d milliseconds)\n", current_time_us(), $#threads + 1, $time_until_read_only * 100; if ( $#threads < 5 ) { print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n" foreach (@threads); } } sleep_until(); $_tstart = [gettimeofday]; $time_until_read_only--; @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); } ## Setting read_only=1 on the current master so that nobody(except SUPER) can write print current_time_us() . " Set read_only=1 on the orig master.. "; $orig_master_handler->enable_read_only(); if ( $orig_master_handler->is_read_only() ) { print "ok.\n"; } else { die "Failed!\n"; } ## Waiting for M * 100 milliseconds so that current update queries can complete my $time_until_kill_threads = 5; @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); while ( $time_until_kill_threads > 0 && $#threads >= 0 ) { if ( $time_until_kill_threads % 5 == 0 ) { printf "%s Waiting all running %d queries are disconnected.. (max %d milliseconds)\n", current_time_us(), $#threads + 1, $time_until_kill_threads * 100; if ( $#threads < 5 ) { print Data::Dumper->new( [$_] )->Indent(0)->Terse(1)->Dump . "\n" foreach (@threads); } } sleep_until(); $_tstart = [gettimeofday]; $time_until_kill_threads--; @threads = get_threads_util( $orig_master_handler->{dbh}, $orig_master_handler->{connection_id} ); } print "Disabling the VIP on old master: $orig_master_host \n"; &stop_vip(); ## Terminating all threads print current_time_us() . " Killing all application threads..\n"; $orig_master_handler->kill_threads(@threads) if ( $#threads >= 0 ); print current_time_us() . " done.\n"; #$orig_master_handler->enable_log_bin_local(); $orig_master_handler->disconnect(); ## After finishing the script, MHA executes FLUSH TABLES WITH READ LOCK $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "start" ) { ## Activating master ip on the new master # 1. Create app user with write privileges # 2. Moving backup script if needed # 3. Register new master's ip to the catalog database # We don't return error even though activating updatable accounts/ip failed so that we don't interrupt slaves' recovery. # If exit code is 0 or 10, MHA does not abort my $exit_code = 10; eval { my $new_master_handler = new MHA::DBHelper(); # args: hostname, port, user, password, raise_error_or_not $new_master_handler->connect( $new_master_ip, $new_master_port, $new_master_user, $new_master_password, 1 ); ## Set read_only=0 on the new master #$new_master_handler->disable_log_bin_local(); print current_time_us() . " Set read_only=0 on the new master.\n"; $new_master_handler->disable_read_only(); ## Creating an app user on the new master #print current_time_us() . " Creating app user on the new master..\n"; #FIXME_xxx_create_app_user($new_master_handler); #$new_master_handler->enable_log_bin_local(); $new_master_handler->disconnect(); ## Update master ip on the catalog database, etc print "Enabling the VIP - $vip on the new master - $new_master_host \n"; &start_vip(); $exit_code = 0; }; if ($@) { warn "Got Error: $@\n"; exit $exit_code; } exit $exit_code; } elsif ( $command eq "status" ) { # do nothing exit 0; } else { &usage(); exit 1; } } # A simple system call that enable the VIP on the new master sub start_vip() { `ssh $new_master_ssh_user\@$new_master_host \" $ssh_start_vip \"`; } # A simple system call that disable the VIP on the old_master sub stop_vip() { `ssh $orig_master_ssh_user\@$orig_master_host \" $ssh_stop_vip \"`; } sub usage { print "Usage: master_ip_online_change --command=start|stop|status --orig_master_host=host --orig_master_ip=ip --orig_master_port=port --orig_master_user=user --orig_master_password=password --orig_master_ssh_user=sshuser --new_master_host=host --new_master_ip=ip --new_master_port=port --new_master_user=user --new_master_password=password --new_master_ssh_user=sshuser \n"; die; }
更换ip后,一定要执行下 arping
验证 switchover
手动切换之前先停止 mha manager
关闭 mha manager
#masterha_stop --conf=/etc/mysql_mha/app1.cnf
查看 manager status
# masterha_check_status --conf=/etc/mysql_mha/app1.cnf
app1 is stopped(2:NOT_RUNNING).
查看 manager log
# tail -n 1000 -f /data/mysql_mha/app1-manager.log
检查复制环境ssh
# masterha_check_ssh --conf=/etc/mysql_mha/app1.cnf
检查整个复制环境
# masterha_check_repl --conf=/etc/mysql_mha/app1.cnf
手动执行
目前,ed3jrdba90目前是 master,ed3jrdba97是ed3jrdba90的 slave, 切换将ed3jrdba97为master,ed3jrdba90为slave。
# masterha_master_switch --conf=/etc/mysql_mha/app1.cnf --master_state=alive --orig_master_is_new_slave --new_master_host=10.150.20.97 --new_master_port=33061 --running_updates_limit=10000
# masterha_master_switch --conf=/etc/mysql_mha/app1.cnf --master_state=alive --orig_master_is_new_slave --new_master_host=10.150.20.97 --new_master_port=33061 --running_updates_limit=10000 Wed Dec 12 16:48:48 2018 - [info] MHA::MasterRotate version 0.58. Wed Dec 12 16:48:48 2018 - [info] Starting online master switch.. Wed Dec 12 16:48:48 2018 - [info] Wed Dec 12 16:48:48 2018 - [info] * Phase 1: Configuration Check Phase.. Wed Dec 12 16:48:48 2018 - [info] Wed Dec 12 16:48:48 2018 - [info] Reading default configuration from /etc/masterha_default.cnf.. Wed Dec 12 16:48:48 2018 - [info] Reading application default configuration from /etc/mysql_mha/app1.cnf.. Wed Dec 12 16:48:48 2018 - [info] Reading server configuration from /etc/mysql_mha/app1.cnf.. Wed Dec 12 16:48:49 2018 - [info] GTID failover mode = 0 Wed Dec 12 16:48:49 2018 - [info] Current Alive Master: 10.150.20.90(10.150.20.90:33061) Wed Dec 12 16:48:49 2018 - [info] Alive Slaves: Wed Dec 12 16:48:49 2018 - [info] 10.150.20.97(10.150.20.97:33061) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled Wed Dec 12 16:48:49 2018 - [info] Replicating from 10.150.20.90(10.150.20.90:33061) Wed Dec 12 16:48:49 2018 - [info] 10.150.20.132(10.150.20.132:33061) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled Wed Dec 12 16:48:49 2018 - [info] Replicating from 10.150.20.90(10.150.20.90:33061) It is better to execute FLUSH NO_WRITE_TO_BINLOG TABLES on the master before switching. Is it ok to execute on 10.150.20.90(10.150.20.90:33061)? (YES/no): YES Wed Dec 12 16:48:50 2018 - [info] Executing FLUSH NO_WRITE_TO_BINLOG TABLES. This may take long time.. Wed Dec 12 16:48:50 2018 - [info] ok. Wed Dec 12 16:48:50 2018 - [info] Checking MHA is not monitoring or doing failover.. Wed Dec 12 16:48:50 2018 - [info] Checking replication health on 10.150.20.97.. Wed Dec 12 16:48:50 2018 - [info] ok. Wed Dec 12 16:48:50 2018 - [info] Checking replication health on 10.150.20.132.. Wed Dec 12 16:48:50 2018 - [info] ok. Wed Dec 12 16:48:50 2018 - [info] 10.150.20.97 can be new master. Wed Dec 12 16:48:50 2018 - [info] From: 10.150.20.90(10.150.20.90:33061) (current master) +--10.150.20.97(10.150.20.97:33061) +--10.150.20.132(10.150.20.132:33061) To: 10.150.20.97(10.150.20.97:33061) (new master) +--10.150.20.132(10.150.20.132:33061) +--10.150.20.90(10.150.20.90:33061) Starting master switch from 10.150.20.90(10.150.20.90:33061) to 10.150.20.97(10.150.20.97:33061)? (yes/NO): yes Wed Dec 12 16:48:51 2018 - [info] Checking whether 10.150.20.97(10.150.20.97:33061) is ok for the new master.. Wed Dec 12 16:48:51 2018 - [info] ok. Wed Dec 12 16:48:51 2018 - [info] 10.150.20.90(10.150.20.90:33061): SHOW SLAVE STATUS returned empty result. To check replication filtering rules, temporarily executing CHANGE MASTER to a dummy host. Wed Dec 12 16:48:51 2018 - [info] 10.150.20.90(10.150.20.90:33061): Resetting slave pointing to the dummy host. Wed Dec 12 16:48:51 2018 - [info] ** Phase 1: Configuration Check Phase completed. Wed Dec 12 16:48:51 2018 - [info] Wed Dec 12 16:48:51 2018 - [info] * Phase 2: Rejecting updates Phase.. Wed Dec 12 16:48:51 2018 - [info] Wed Dec 12 16:48:51 2018 - [info] Executing master ip online change script to disable write on the current master: Wed Dec 12 16:48:51 2018 - [info] /usr/local/bin/master_ip_online_change --command=stop --orig_master_host=10.150.20.90 --orig_master_ip=10.150.20.90 --orig_master_port=33061 --orig_master_user='mha_monitor' --new_master_host=10.150.20.97 --new_master_ip=10.150.20.97 --new_master_port=33061 --new_master_user='mha_monitor' --orig_master_ssh_user=root --new_master_ssh_user=root --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx Wed Dec 12 16:48:51 2018 572739 Set read_only on the new master.. ok. Wed Dec 12 16:48:51 2018 576690 Set read_only=1 on the orig master.. ok. Disabling the VIP on old master: 10.150.20.90 Wed Dec 12 16:48:51 2018 716665 Killing all application threads.. Wed Dec 12 16:48:51 2018 716732 done. Wed Dec 12 16:48:51 2018 - [info] ok. Wed Dec 12 16:48:51 2018 - [info] Locking all tables on the orig master to reject updates from everybody (including root): Wed Dec 12 16:48:51 2018 - [info] Executing FLUSH TABLES WITH READ LOCK.. Wed Dec 12 16:48:51 2018 - [info] ok. Wed Dec 12 16:48:51 2018 - [info] Orig master binlog:pos is mysql-bin.000009:154. Wed Dec 12 16:48:51 2018 - [info] Waiting to execute all relay logs on 10.150.20.97(10.150.20.97:33061).. Wed Dec 12 16:48:51 2018 - [info] master_pos_wait(mysql-bin.000009:154) completed on 10.150.20.97(10.150.20.97:33061). Executed 0 events. Wed Dec 12 16:48:51 2018 - [info] done. Wed Dec 12 16:48:51 2018 - [info] Getting new master's binlog name and position.. Wed Dec 12 16:48:51 2018 - [info] mysql-bin.000014:154 Wed Dec 12 16:48:51 2018 - [info] All other slaves should start replication from here. Statement should be: CHANGE MASTER TO MASTER_HOST='10.150.20.97', MASTER_PORT=33061, MASTER_LOG_FILE='mysql-bin.000014', MASTER_LOG_POS=154, MASTER_USER='replicator', MASTER_PASSWORD='xxx'; Wed Dec 12 16:48:51 2018 - [info] Executing master ip online change script to allow write on the new master: Wed Dec 12 16:48:51 2018 - [info] /usr/local/bin/master_ip_online_change --command=start --orig_master_host=10.150.20.90 --orig_master_ip=10.150.20.90 --orig_master_port=33061 --orig_master_user='mha_monitor' --new_master_host=10.150.20.97 --new_master_ip=10.150.20.97 --new_master_port=33061 --new_master_user='mha_monitor' --orig_master_ssh_user=root --new_master_ssh_user=root --orig_master_is_new_slave --orig_master_password=xxx --new_master_password=xxx Wed Dec 12 16:48:51 2018 875301 Set read_only=0 on the new master. Enabling the VIP - 10.150.20.200 on the new master - 10.150.20.97 Wed Dec 12 16:48:52 2018 - [info] ok. Wed Dec 12 16:48:52 2018 - [info] Wed Dec 12 16:48:52 2018 - [info] * Switching slaves in parallel.. Wed Dec 12 16:48:52 2018 - [info] Wed Dec 12 16:48:52 2018 - [info] -- Slave switch on host 10.150.20.132(10.150.20.132:33061) started, pid: 28145 Wed Dec 12 16:48:52 2018 - [info] Wed Dec 12 16:48:53 2018 - [info] Log messages from 10.150.20.132 ... Wed Dec 12 16:48:53 2018 - [info] Wed Dec 12 16:48:52 2018 - [info] Waiting to execute all relay logs on 10.150.20.132(10.150.20.132:33061).. Wed Dec 12 16:48:52 2018 - [info] master_pos_wait(mysql-bin.000009:154) completed on 10.150.20.132(10.150.20.132:33061). Executed 0 events. Wed Dec 12 16:48:52 2018 - [info] done. Wed Dec 12 16:48:52 2018 - [info] Resetting slave 10.150.20.132(10.150.20.132:33061) and starting replication from the new master 10.150.20.97(10.150.20.97:33061).. Wed Dec 12 16:48:52 2018 - [info] Executed CHANGE MASTER. Wed Dec 12 16:48:52 2018 - [info] Slave started. Wed Dec 12 16:48:53 2018 - [info] End of log messages from 10.150.20.132 ... Wed Dec 12 16:48:53 2018 - [info] Wed Dec 12 16:48:53 2018 - [info] -- Slave switch on host 10.150.20.132(10.150.20.132:33061) succeeded. Wed Dec 12 16:48:53 2018 - [info] Unlocking all tables on the orig master: Wed Dec 12 16:48:53 2018 - [info] Executing UNLOCK TABLES.. Wed Dec 12 16:48:53 2018 - [info] ok. Wed Dec 12 16:48:53 2018 - [info] Starting orig master as a new slave.. Wed Dec 12 16:48:53 2018 - [info] Resetting slave 10.150.20.90(10.150.20.90:33061) and starting replication from the new master 10.150.20.97(10.150.20.97:33061).. Wed Dec 12 16:48:53 2018 - [info] Executed CHANGE MASTER. Wed Dec 12 16:48:53 2018 - [info] Slave started. Wed Dec 12 16:48:53 2018 - [info] All new slave servers switched successfully. Wed Dec 12 16:48:53 2018 - [info] Wed Dec 12 16:48:53 2018 - [info] * Phase 5: New master cleanup phase.. Wed Dec 12 16:48:53 2018 - [info] Wed Dec 12 16:48:53 2018 - [info] 10.150.20.97: Resetting slave info succeeded. Wed Dec 12 16:48:53 2018 - [info] Switching master to 10.150.20.97(10.150.20.97:33061) completed successfully.
此时,查看新主ed3jrdba97的信息
# ifconfig ens3:1
ens3:1: flags=4163<UP,BROADCAST,RUNNING,MULTICAST> mtu 1500
inet 10.150.20.200 netmask 255.255.255.0 broadcast 10.150.20.255
ether 54:52:00:49:48:92 txqueuelen 1000 (Ethernet)
root@(none) 04:50:08>show processlist;
+----+------------+---------------------+------+-------------+------+---------------------------------------------------------------+------------------+
| Id | User | Host | db | Command | Time | State | Info |
+----+------------+---------------------+------+-------------+------+---------------------------------------------------------------+------------------+
| 13 | root | localhost | NULL | Sleep | 755 | | NULL |
| 36 | replicator | 10.150.20.132:38717 | NULL | Binlog Dump | 79 | Master has sent all binlog to slave; waiting for more updates | NULL |
| 37 | replicator | 10.150.20.90:15003 | NULL | Binlog Dump | 78 | Master has sent all binlog to slave; waiting for more updates | NULL |
| 38 | root | localhost | NULL | Query | 0 | starting | show processlist |
+----+------------+---------------------+------+-------------+------+---------------------------------------------------------------+------------------+
4 rows in set (0.00 sec)
manager节点:
# masterha_check_repl --conf=/etc/mysql_mha/app1.cnf
Wed Dec 12 16:51:22 2018 - [info] Reading default configuration from /etc/masterha_default.cnf.. Wed Dec 12 16:51:22 2018 - [info] Reading application default configuration from /etc/mysql_mha/app1.cnf.. Wed Dec 12 16:51:22 2018 - [info] Reading server configuration from /etc/mysql_mha/app1.cnf.. Wed Dec 12 16:51:22 2018 - [info] MHA::MasterMonitor version 0.58. Wed Dec 12 16:51:23 2018 - [info] GTID failover mode = 0 Wed Dec 12 16:51:23 2018 - [info] Dead Servers: Wed Dec 12 16:51:23 2018 - [info] Alive Servers: Wed Dec 12 16:51:23 2018 - [info] 10.150.20.90(10.150.20.90:33061) Wed Dec 12 16:51:23 2018 - [info] 10.150.20.97(10.150.20.97:33061) Wed Dec 12 16:51:23 2018 - [info] 10.150.20.132(10.150.20.132:33061) Wed Dec 12 16:51:23 2018 - [info] Alive Slaves: Wed Dec 12 16:51:23 2018 - [info] 10.150.20.90(10.150.20.90:33061) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled Wed Dec 12 16:51:23 2018 - [info] Replicating from 10.150.20.97(10.150.20.97:33061) Wed Dec 12 16:51:23 2018 - [info] 10.150.20.132(10.150.20.132:33061) Version=5.7.21-log (oldest major version between slaves) log-bin:enabled Wed Dec 12 16:51:23 2018 - [info] Replicating from 10.150.20.97(10.150.20.97:33061) Wed Dec 12 16:51:23 2018 - [info] Current Alive Master: 10.150.20.97(10.150.20.97:33061) Wed Dec 12 16:51:23 2018 - [info] Checking slave configurations.. Wed Dec 12 16:51:23 2018 - [info] read_only=1 is not set on slave 10.150.20.132(10.150.20.132:33061). Wed Dec 12 16:51:23 2018 - [info] Checking replication filtering settings.. Wed Dec 12 16:51:23 2018 - [info] binlog_do_db= , binlog_ignore_db= Wed Dec 12 16:51:23 2018 - [info] Replication filtering check ok. Wed Dec 12 16:51:23 2018 - [info] GTID (with auto-pos) is not supported Wed Dec 12 16:51:23 2018 - [info] Starting SSH connection tests.. Wed Dec 12 16:51:26 2018 - [info] All SSH connection tests passed successfully. Wed Dec 12 16:51:26 2018 - [info] Checking MHA Node version.. Wed Dec 12 16:51:27 2018 - [info] Version check ok. Wed Dec 12 16:51:27 2018 - [info] Checking SSH publickey authentication settings on the current master.. Wed Dec 12 16:51:27 2018 - [info] HealthCheck: SSH to 10.150.20.97 is reachable. Wed Dec 12 16:51:27 2018 - [info] Master MHA Node version is 0.58. Wed Dec 12 16:51:27 2018 - [info] Checking recovery script configurations on 10.150.20.97(10.150.20.97:33061).. Wed Dec 12 16:51:27 2018 - [info] Executing command: save_binary_logs --command=test --start_pos=4 --binlog_dir=/data/mysql_33061/logs --output_file=/data/mysql_mha/app1/save_binary_logs_test --manager_version=0.58 --start_file=mysql-bin.000014 Wed Dec 12 16:51:27 2018 - [info] Connecting to root@10.150.20.97(10.150.20.97:22).. Creating /data/mysql_mha/app1 if not exists.. ok. Checking output directory is accessible or not.. ok. Binlog found at /data/mysql_33061/logs, up to mysql-bin.000014 Wed Dec 12 16:51:27 2018 - [info] Binlog setting check done. Wed Dec 12 16:51:27 2018 - [info] Checking SSH publickey authentication and checking recovery script configurations on all alive slave servers.. Wed Dec 12 16:51:27 2018 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha_monitor' --slave_host=10.150.20.90 --slave_ip=10.150.20.90 --slave_port=33061 --workdir=/data/mysql_mha/app1 --target_version=5.7.21-log --manager_version=0.58 --relay_log_info=/data/mysql_33061/logs/relay-log.info --relay_dir=/data/mysql_33061/data/ --slave_pass=xxx Wed Dec 12 16:51:27 2018 - [info] Connecting to root@10.150.20.90(10.150.20.90:22).. Checking slave recovery environment settings.. Opening /data/mysql_33061/logs/relay-log.info ... ok. Relay log found at /data/mysql_33061/logs, up to relaylog.000002 Temporary relay log file is /data/mysql_33061/logs/relaylog.000002 Checking if super_read_only is defined and turned on.. not present or turned off, ignoring. Testing mysql connection and privileges.. mysql: [Warning] Using a password on the command line interface can be insecure. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Wed Dec 12 16:51:27 2018 - [info] Executing command : apply_diff_relay_logs --command=test --slave_user='mha_monitor' --slave_host=10.150.20.132 --slave_ip=10.150.20.132 --slave_port=33061 --workdir=/data/mysql_mha/app1 --target_version=5.7.21-log --manager_version=0.58 --relay_log_info=/data/mysql_33061/logs/relay-log.info --relay_dir=/data/mysql_33061/data/ --slave_pass=xxx Wed Dec 12 16:51:27 2018 - [info] Connecting to root@10.150.20.132(10.150.20.132:22).. Checking slave recovery environment settings.. Opening /data/mysql_33061/logs/relay-log.info ... ok. Relay log found at /data/mysql_33061/data, up to cgdb-relay-bin.000002 Temporary relay log file is /data/mysql_33061/data/cgdb-relay-bin.000002 Checking if super_read_only is defined and turned on.. not present or turned off, ignoring. Testing mysql connection and privileges.. mysql: [Warning] Using a password on the command line interface can be insecure. done. Testing mysqlbinlog output.. done. Cleaning up test file(s).. done. Wed Dec 12 16:51:28 2018 - [info] Slaves settings check done. Wed Dec 12 16:51:28 2018 - [info] 10.150.20.97(10.150.20.97:33061) (current master) +--10.150.20.90(10.150.20.90:33061) +--10.150.20.132(10.150.20.132:33061) Wed Dec 12 16:51:28 2018 - [info] Checking replication health on 10.150.20.90.. Wed Dec 12 16:51:28 2018 - [info] ok. Wed Dec 12 16:51:28 2018 - [info] Checking replication health on 10.150.20.132.. Wed Dec 12 16:51:28 2018 - [info] ok. Wed Dec 12 16:51:28 2018 - [info] Checking master_ip_failover_script status: Wed Dec 12 16:51:28 2018 - [info] /usr/local/bin/master_ip_failover --command=status --ssh_user=root --orig_master_host=10.150.20.97 --orig_master_ip=10.150.20.97 --orig_master_port=33061 IN SCRIPT TEST====/usr/sbin/ip addr del 10.150.20.200/24 dev ens3 label ens3:1==/usr/sbin/ip addr add 10.150.20.200/24 brd 10.150.20.255 dev ens3 label ens3:1;/usr/sbin/arping -q -A -c 1 -I ens3 10.150.20.200;iptables -F;=== Checking the Status of the script.. OK Wed Dec 12 16:51:28 2018 - [info] OK. Wed Dec 12 16:51:28 2018 - [warning] shutdown_script is not defined. Wed Dec 12 16:51:28 2018 - [info] Got exit code 0 (Not master dead). MySQL Replication Health is OK.
可以看到当前的架构:
10.150.20.97(10.150.20.97:33061) (current master)
+--10.150.20.90(10.150.20.90:33061)
+--10.150.20.132(10.150.20.132:33061)
启动 mha manager
switchover成功后,修改/etc/mysql_mha/app1.cnf,然后再启动 mha manager
启动 manager
# nohup masterha_manager --conf=/etc/mysql_mha/app1.cnf --remove_dead_master_conf --ignore_last_failover < /dev/null > /data/mysql_mha/app1-manager.log 2>&1 &
查看 manager status
# masterha_check_status --conf=/etc/mysql_mha/app1.cnf
查看 manager log
# tail -n 1000 -f /var/log/mysql_mha/app1-manager.log
switchover的过程,基本为以下步骤:
1.检测复制设置和确定当前主服务器
2.确定新的主服务器
3.阻塞写入到当前主服务器
4.等待所有从服务器赶上复制
5.授予写入到新的主服务器
6.重新设置从服务器