Mysql innodb cluster

操作系统层面配置:

临时关闭防火墙

systemctl stop firewalld

永久防火墙开机自启动

systemctl disable firewalld

临时打开防火墙

systemctl start firewalld

防火墙开机启动

systemctl enable firewalld

查看防火墙状态

systemctl status firewalld

 

临时关闭SELinux

setenforce 0

永久关闭

编辑/etc/selinux/config文件,将SELINUX的值设置为disabled

 

 

操作系统层面资源限制

$ vim /etc/security/limits.conf,添加:

mysql soft nproc 2047

mysql hard nproc 16384

mysql soft nofile 1024

mysql hard nofile 65535

 

配置host信息

[root@cluster3 bin]# cat /etc/hosts

127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4

::1         localhost localhost.localdomain localhost6 localhost6.localdomain6

192.168.0.210 cluster1

192.168.0.220 cluster2

192.168.0.230 cluster3

 

安装mysql

mysql-8.0/bin/mysqld --initialize-insecure --basedir=/u01/mysql-8.0.13 --datadir=/data/ --user=mysql

 

配置mysql参数文件

[client]

default-character-set = utf8

port    = 3306

socket  = /data/s4/mysql.sock

[mysqld]

user = mysql

character_set_server = utf8

basedir = /u01/mysql-8.0.13

datadir = /data

tmpdir  = /data

log-error = /data/mysql_error.log

pid-file = /data/mysql.pid

port = 3306

socket = /data/mysql.sock

max_connections = 3000

open_files_limit = 65535

max_connect_errors = 6000

skip-host-cache

skip-external-locking

skip-name-resolve

max_allowed_packet = 32M

read_rnd_buffer_size = 16M

join_buffer_size = 2M

sort_buffer_size = 2M

thread_cache_size = 300

tmp_table_size = 64M

max_heap_table_size = 64M

skip-host-cache

skip-name-resolve

binlog_format = ROW

transaction_isolation = READ-COMMITTED

#INNODB

innodb_buffer_pool_size = 200M

innodb_log_file_size = 50M

innodb_log_buffer_size = 8M

innodb_log_files_in_group = 3

innodb_file_per_table = 1

default-storage-engine = InnoDB

#log

expire_logs_days = 5

slow_query_log = true

long_query_time = 5

slow-query-log-file = /data/mysql_slow.log

lower_case_table_names=0

 

# Replication configuration parameters

server_id=1

gtid_mode=ON

enforce_gtid_consistency=ON

binlog_checksum=NONE

 

log-bin=mysql-bin

log_slave_updates=ON

binlog_format=ROW

master_info_repository=TABLE

relay_log_info_repository=TABLE

 

 

启动mysql后,配置用户

MGR所需权限

CREATE USER rpl_user@'%' IDENTIFIED BY 'password';

GRANT REPLICATION SLAVE ON *.* TO rpl_user@'%';

GRANT BACKUP_ADMIN ON *.* TO rpl_user@'%';

FLUSH PRIVILEGES;

 

GRANT SELECT ON mysql_innodb_cluster_metadata.* TO your_user@'%';

GRANT SELECT ON performance_schema.global_status TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_configuration TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_status TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_status_by_coordinator TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_status_by_worker TO your_user@'%';

GRANT SELECT ON performance_schema.replication_connection_configuration TO your_user@'%';

GRANT SELECT ON performance_schema.replication_connection_status TO your_user@'%';

GRANT SELECT ON performance_schema.replication_group_member_stats TO your_user@'%';

GRANT SELECT ON performance_schema.replication_group_members TO your_user@'%';

GRANT SELECT ON performance_schema.threads TO your_user@'%' WITH GRANT OPTION;

 

简单创建用户

SET SQL_LOG_BIN=0;

CREATE USER root@'%' IDENTIFIED BY 123456;

GRANT all on *.* TO root@'%';

FLUSH PRIVILEGES;

SET SQL_LOG_BIN=1;

 

安装mgr插件

INSTALL PLUGIN group_replication SONAME 'group_replication.so';

SHOW PLUGINS;

 

Mysql shell配置集群

检查三个实例是否满足innodb cluster要求

dba.configureInstance('repl@cluster1:3306)

dba.configureInstance('repl@cluster2:3306)

dba.configureInstance('repl@cluster3:3306)

 

连接某一个实例,开始创建集群

\connect root@cluster1:3306

 

var cluster = dba.createCluster('myCluster')

var cluster = dba.getCluster()

 

cluster.addInstance(root@cluster2:3306')

cluster.addInstance('root@cluster3: 3306')

 

查看集群状态

cluster.status()

 

配置mysql router

安装

./mysqlrouter --bootstrap root@cluster1:3306 --directory /u01/mysql-router-8.0.13/ --user=mysql

自动生成启动脚本

./data/start.sh

 

Mgr监控视图

SELECT * FROM performance_schema.replication_group_members\G

performance_schema.replication_group_member_stats

performance_schema.replication_group_members

These Perfomance Schema replication tables also show information about Group Replication:

performance_schema.replication_connection_status shows information regarding Group

Replication, for example the transactions that have been received from the group and queued in the

applier queue (the relay log).

performance_schema.replication_applier_status shows the state of the Group Replication

related channels and threads If there are many different worker threads applying transactions, then the

worker tables can also be used to monitor what each worker thread is doing.

The replication channels created by the Group Replication plugin are named:

group_replication_recovery - This channel is used for the replication changes that are related to

the distributed recovery phase.

group_replication_applier - This channel is used for the incoming changes from the group. This

is the channel used to apply transactions coming directly from the group.

The following sections describe how to interpret the information available.

 

##改变主的位置

SELECT group_replication_set_as_primary('102c2bc6-18c1-11e9-92a6-000c296459b4');

 

##修改mgr为单主模式

##Changing to Single-Primary Mode

SELECT group_replication_switch_to_single_primary_mode()

 

##修改mgr为多主模式

SELECT group_replication_switch_to_multi_primary_mode()

 

dba.dropMetadataSchema()

cluster.removeInstance("root@cluster2:3306", {force: true})

 

##重新启动集群

var cluster = dba.rebootClusterFromCompleteOutage();

 

 

集群常用操作命令

dba.checkInstanceConfiguration("root@hostname:3306") // 检查节点配置实例,用于加入cluster之前

dba.rebootClusterFromCompleteOutage('myCluster'); //重启 

dba.dropMetadataSchema(); //删除schema

var cluster = dba.getCluster('myCluster') // 获取当前集群

cluster.checkInstanceState("root@hostname:3306") //检查cluster里节点状态

cluster.rejoinInstance("root@hostname:3306") // 重新加入节点,我本地测试的时候发现rejoin一直无效,每次是delete后

addcluster.dissolve({force:true}) // 删除集群

cluster.addInstance("root@hostname:3306") // 增加节点

cluster.removeInstance("root@hostname:3306") // 删除节点

cluster.removeInstance('root@host:3306',{force:true}) // 强制删除节点

cluster.dissolve({force:true}) //解散集群

cluster.describe();//集群描述

cluster.rescan(); //update the metadata

 

 

常见问题

问题1:[ERROR] Slave SQL for channel 'group_replication_recovery': Could not execute Write_rows event on table mysql_innodb_cluster_metadata.instances; Cannot add or update a child row: a foreign key constraint fails (mysql_innodb_cluster_metadata.instances, CONSTRAINT instances_ibfk_1 FOREIGN KEY (host_id) REFERENCES hosts (host_id)), Error_code: 1452; handler error HA_ERR_NO_REFERENCED_ROW; the event's master log binlog.000001, end_log_pos 3059, Error_code: 1452

解决方式:清空表mysql_innodb_cluster_metadata.hosts; 重新建立集群

 

 

问题2:This member has more executed transactions than those present in the group

解决方式:

mysql-> stop group_replication;

mysql-> reset master;

 

 

问题3:用户操作系统资源的限制

[Warning] Buffered warning: Changed limits: max_open_files: 1024 (requested 5000)

[Warning] Buffered warning: Changed limits: table_open_cache: 431 (requested 2000)

解决方式:

$ vim /etc/security/limits.conf,添加:

mysql soft nproc 2047

mysql hard nproc 16384

mysql soft nofile 1024

mysql hard nofile 65535

 

 

问题4:dba.rebootClusterFromCompleteOutage: The active session instance isn't the most updated in comparison with the ONLINE instances of the Cluster's metadata.

在集群没有起来时某些机器的数据表发生变动,导致数据不一致;

解决方式:

所有MySQL机器通过reset master命令清空binlogs

mysql> reset master;

mysql> show master logs;

然后再运行Dba.rebootClusterFromCompleteOutage重启集群。

 

 

问题5:service mysql restart无法重启mysql,mysql stuck,并一直输出日志'[Note] Plugin group_replication reported: '[GCS] cli_err 2''

解决方式:唯一停止MySQL的命令为:sudo pkill -9 mysqld

 

 

问题6:如何将Multi-Primary改为Single-Primary?

(1). 解散原来的集群:mysql-js> cluster.dissolve({force: true})

(2). 每台主机MySQL修改如下配置:

mysql> set global group_replication_enforce_update_everywhere_checks=OFF;

mysql> set global group_replication_single_primary_mode=ON;

(3). 重新创建集群:

mysql-js> var cluster = dba.createCluster('mysqlCluster');

mysql-js> cluster.addInstance('chianyu@svr2:3306');

mysql-js> cluster.addInstance('chianyu@svr3:3306');

 

mysql router+keepalived做高可用

yum –y install keepalived*

 

 

[root@cluster1 mysql-router-8.0.13]# cat /etc/keepalived/keepalived.conf

 

vrrp_script chk_router {

    script "/u01/mysql-router-8.0.13/check_router.sh"

    interval 2

    weight -20

}

 

 

vrrp_instance VI_1 {

    state MASTER

    interface ens33

    virtual_router_id 51

    priority 100

    advert_int 1

    authentication {

        auth_type PASS

        auth_pass 1111

    }

 

    track_script {

        chk_router

    }      

 

    virtual_ipaddress {

        192.168.0.200

    }

}

 

检查脚本:

[root@cluster1 mysql-router-8.0.13]# cat check_router.sh

#!/bin/bash

counter=$(netstat -na|grep "LISTEN"|grep "6446"|wc -l)

if [ "${counter}" -eq 0 ]; then

    systemctl stop keepalived

fi

 

配置完成后,查看/var/log/message日志

 

[root@cluster2 mysql-router-8.0.13]# cat /etc/keepalived/keepalived.conf

 

vrrp_script chk_router {

    script "/u01/mysql-router-8.0.13/check_router.sh"

    interval 2

    weight -20

}

 

 

vrrp_instance VI_1 {

    state backup

    interface ens33

    virtual_router_id 51

    priority 95

    advert_int 1

    authentication {

        auth_type PASS

        auth_pass 1111

    }

 

    track_script {

        chk_router

    }      

 

    virtual_ipaddress {

        192.168.0.200

    }

}

 

 

给脚本授权

chown mysql:mysql check_router.sh

chmod 755 check_router.sh

 

启动keepalived

systemctl start keepalived

systemctl stop keepalived

 

LVS dr模式配置

1)安装ipvsadm

 

yum -y install ipvsadm

(2)设置ipv4转发

 

sysctl -w net.ipv4.ip_forward=1

控制节点执行脚本

[root@cluster1 opt]# cat 1.sh

#!/bin/bash

#

vip=192.168.0.200

iface='ens33:0'

mask='255.255.255.255'

port='6446'

rs1='192.168.0.220'

rs2='192.168.0.230'

scheduler='wrr'

case $1 in

start)

     ifconfig $iface $vip netmask $mask broadcast $vip up

     iptables -F

     ipvsadm -A -t ${vip}:${port} -s $scheduler

     ipvsadm -a -t ${vip}:${port} -r $rs1 -g -w 1

     ipvsadm -a -t ${vip}:${port} -r $rs2 -g -w 2

     ;;

stop)

     ipvsadm -C

     ifconfig $iface down

     ;;

*)

     echo "Usage: $(basename $0) {start|stop|status}"

     exit 1

     ;;

esac

 

实际处理节点

[root@cluster2 opt]# cat 1.sh

#!/bin/bash

#

vip=192.168.0.200

mask='255.255.255.255'

case $1 in

start)

     echo 1 > /proc/sys/net/ipv4/conf/all/arp_ignore

     echo 1 > /proc/sys/net/ipv4/conf/lo/arp_ignore

     echo 2 > /proc/sys/net/ipv4/conf/all/arp_announce

     echo 2 > /proc/sys/net/ipv4/conf/lo/arp_announce

     /sbin/ifconfig lo:0 $vip netmask $mask  broadcast $vip up

     route add -host $vip dev lo:0

     ;;

stop)

     /sbin/ifconfig lo:0  down

     echo 0 > /proc/sys/net/ipv4/conf/all/arp_ignore

     echo 0 > /proc/sys/net/ipv4/conf/lo/arp_ignore

     echo 0 > /proc/sys/net/ipv4/conf/all/arp_announce

     echo 0 > /proc/sys/net/ipv4/conf/lo/arp_announce

     route del -host $vip dev lo:0

     ;;

status)

     # Status of LVS-DR real server.

     islothere=`/sbin/ifconfig lo:0 | grep $vip`

     isrothere=`netstat -rn | grep "lo:0" | grep $vip`

     if [ ! "$islothere" -o ! "isrothere" ]; then

         # Either the route or the lo:0 device

         # not found.

         echo "LVS-DR real server Stopped."

     else

         echo "LVS-DR real server Running."

     fi

;;

*)

     echo "Usage $(basename $0) start|stop"

     exit 1

     ;;

esac

 

查看分发情况

[root@cluster1 opt]# ipvsadm -L -n --stats
IP Virtual Server version 1.2.1 (size=4096)
Prot LocalAddress:Port Conns InPkts OutPkts InBytes OutBytes
-> RemoteAddress:Port
TCP 192.168.0.200:6446 0 0 0 0 0
-> 192.168.0.220:6446 0 0 0 0 0
-> 192.168.0.230:6446 0 0 0 0 0
TCP 192.168.0.200:6447 0 0 0 0 0
-> 192.168.0.220:6447 0 0 0 0 0
-> 192.168.0.230:6447 0 0 0 0 0

 

posted @ 2019-11-12 15:56  阿西吧li  阅读(652)  评论(0编辑  收藏  举报