MySQL MHA--主库故障检测
MHA主库检查参数
MHA从0.53版本开始支持ping_type参数来设置如何检查master可用性:
ping_type=select: 基于一个到master的已经存在的连接执行select 1,连接被重复使用,select检查能快速返回结果,但检查过于简单,无法发现更多故障。
ping_type=connect: 在每次执行select 1操作前后创建和断开连接,能更严格和更快速发现TCP连接级别的故障。
ping_type=insert: 基于一个到master的已经存在的连接执行insert语句,连接被重复使用,能更好检测到数据库因磁盘空间耗尽或磁盘IO资源耗尽导致的故障。在0.56版本被引入。
MHA使用ping_interval来设置MHA Manager探测主库故障的间隔,默认间隔3秒,当连续4次ping失败后,则判定master节点发生故障。
MHA主库检查脚本
ping_select代码(/lib/MHA/HealthCheck.pm)
sub ping_select($) { my $self = shift; my $log = $self->{logger}; my $dbh = $self->{dbh}; my ( $query, $sth, $href ); eval { $dbh->{RaiseError} = 1; $sth = $dbh->prepare("SELECT 1 As Value"); $sth->execute(); $href = $sth->fetchrow_hashref; if ( !defined($href) || !defined( $href->{Value} ) || $href->{Value} != 1 ) { die; } }; if ($@) { my $msg = "Got error on MySQL select ping: "; undef $@; $msg .= $DBI::err if ($DBI::err); $msg .= " ($DBI::errstr)" if ($DBI::errstr); $log->warning($msg) if ($log); return 1; } return 0; }
ping_connect代码(/lib/MHA/HealthCheck.pm)
sub ping_connect($) { my $self = shift; my $log = $self->{logger}; my $dbh; my $rc = 1; my $max_retries = 2; eval { my $ping_start = [gettimeofday]; while ( !$self->{dbh} && $max_retries-- ) { eval { $rc = $self->connect( 1, $self->{interval}, 0, 0, 1 ); }; if ( !$self->{dbh} && $@ ) { die $@ if ( !$max_retries ); } } $rc = $self->ping_select(); # To hold advisory lock for some periods of time $self->sleep_until( $ping_start, $self->{interval} - 1.5 ); $self->disconnect_if(); }; if ($@) { my $msg = "Got error on MySQL connect ping: $@"; undef $@; $msg .= $DBI::err if ($DBI::err); $msg .= " ($DBI::errstr)" if ($DBI::errstr); $log->warning($msg) if ($log); $rc = 1; } return 2 if ( $self->{_already_monitored} ); return $rc; }
ping_insert代码(/lib/MHA/HealthCheck.pm)
sub ping_insert($) { my $self = shift; my $log = $self->{logger}; my $dbh = $self->{dbh}; my ( $query, $sth, $href ); eval { $dbh->{RaiseError} = 1; $dbh->do("CREATE DATABASE IF NOT EXISTS infra"); $dbh->do( "CREATE TABLE IF NOT EXISTS infra.chk_masterha (`key` tinyint NOT NULL primary key,`val` int(10) unsigned NOT NULL DEFAULT '0') engine=MyISAM" ); $dbh->do( "INSERT INTO infra.chk_masterha values (1,unix_timestamp()) ON DUPLICATE KEY UPDATE val=unix_timestamp()" ); }; if ($@) { my $msg = "Got error on MySQL insert ping: "; undef $@; $msg .= $DBI::err if ($DBI::err); $msg .= " ($DBI::errstr)" if ($DBI::errstr); $log->warning($msg) if ($log); return 1; } return 0; }
连续4次检查失败则判定主库发生故障
if ( $error_count >= 4 ) { $ssh_reachable = $self->is_ssh_reachable(); $master_is_down = 1 if ( $self->is_secondary_down() ); last if ($master_is_down); $error_count = 0; }