KingbaseRAC部署案例之---基于SAN存储部署
案例说明:
通过iscsi共享存储作为数据库存储文件系统,构建KingbaseES RAC架构。
适用版本:
KingbaseES V008R006C008M030B0010
操作系统版本:
[root@node201 KingbaseHA]# cat /etc/centos-release
CentOS Linux release 7.9.2009 (Core)
集群架构:
如下所示,node1和node2为集群节点:
节点信息:
[root@node201 KingbaseHA]# vi /etc/hosts
192.168.1.201 node201
192.168.1.202 node202
192.168.1.203 node203 iscsi_Srv
一、创建集群部署目录/opt/KingbaseHA
1、编辑脚本(已经安装KingbaseRAC软件)
[root@node201 KingbaseHA]# cd /opt/Kingbase/ES/V8/install/script/
[root@node202 script]# cat rootDeployClusterware.sh
#!/bin/sh
# copy KingbaseHA to /opt/KingbaseHA
ROOT_UID=0
#INSTALLDIR=@@INSTALL_DIR@@ #脚本变量修改,路径以绝对路径标识
INSTALLDIR=/opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010
........
2、执行脚本
创建/opt/KingbaseHA目录,并copy文件到此目录:
[root@node202 script]# sh -x rootDeployClusterware.sh
+ ROOT_UID=0
+ INSTALLDIR=/opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010
+ '[' x0 '!=' x0 ']'
+ '[' -d /opt/KingbaseHA ']'
+ mkdir -m 775 /opt/KingbaseHA
+ cp -rf /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/cluster_manager.conf /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/cluster_manager.sh /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/corosync /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/corosync-qdevice /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/crmsh /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/dlm-dlm /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/fence_agents /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/gfs2 /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/gfs2-utils /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/ipmi_tool /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/kingbasefs /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/kronosnet /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/lib /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/lib64 /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/libqb /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/pacemaker /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/KingbaseHA/python2.7 /opt/KingbaseHA/
+ exit 0
# 查看目录信息
[root@node201 script]# ls -lhd /opt/KingbaseHA/
drwxrwxr-x 17 root root 4.0K Aug 13 14:32 /opt/KingbaseHA/
二、初始化部署环境
1、集群共享存储(通过iscsi共享)
# 数据库共享存储
[root@node201 script]# fdisk -l /dev/sdb
Disk /dev/sdb: 11.5 GB, 11499421696 bytes, 22459808 sectors
# qdisk存储
[root@node201 script]# fdisk -l /dev/sdc
Disk /dev/sdc: 536 MB, 536870912 bytes, 1048576 sectors
......
2、编辑集群配置文件
[root@node201 KingbaseHA]# cat cluster_manager.conf |grep -v ^#|grep -v ^$
cluster_name=kcluster
node_name=(node201 node202)
node_ip=(192.168.1.201 192.168.2.202)
enable_qdisk=1
votingdisk=/dev/sdc
sharedata_dir=/sharedata/data_gfs2
sharedata_disk=/dev/sdb
install_dir=/opt/KingbaseHA
env_bash_file=/root/.bashrc
pacemaker_daemon_group=haclient
pacemaker_daemon_user=hacluster
kingbaseowner=kingbase
kingbasegroup=kingbase
kingbase_install_dir=/opt/Kingbase/ES/V8/Server
database="test"
username="system"
password="123456"
initdb_options="-A trust -U $username"
enable_fence=1
enable_qdisk_fence=1
.......
install_rac=1
rac_port=55321
rac_lms_port=53444
rac_lms_count=7
.......
3、仲裁盘qdisk初始化(任意节点)
[root@node201 KingbaseHA]# ./cluster_manager.sh --qdisk_init
qdisk init start
Writing new quorum disk label 'kcluster' to /dev/sdc.
WARNING: About to destroy all data on /dev/sdc; proceed? (Y/N):
y
/dev/block/8:32:
/dev/sdc:
Magic: eb7a62c2
Label: kcluster
Created: Tue Aug 13 14:41:49 2024
Host: node201
Kernel Sector Size: 512
Recorded Sector Size: 512
/dev/block/8:64:
/dev/disk/by-id/scsi-360014050da191d8d53b4d04a277aa8f5:
/dev/disk/by-id/wwn-0x60014050da191d8d53b4d04a277aa8f5:
/dev/disk/by-path/ip-192.168.1.203:3260-iscsi-iqn.2024-08.pip.cc:server-lun-1:
/dev/sde:
Magic: eb7a62c2
Label: kcluster
Created: Tue Aug 13 14:41:49 2024
Host: node201
Kernel Sector Size: 512
Recorded Sector Size: 512
qdisk init success
4、数据共享存储初始化(任意节点)
[root@node201 KingbaseHA]# ./cluster_manager.sh --cluster_disk_init
rac disk init start
It appears to contain an existing filesystem (gfs2)
This will destroy any data on /dev/sdb
Are you sure you want to proceed? (Y/N): y
Adding journals: Done
Building resource groups: Done
Creating quota file: Done
Writing superblock and syncing: Done
Device: /dev/sdb
Block size: 4096
Device size: 10.71 GB (2807476 blocks)
Filesystem size: 10.71 GB (2807475 blocks)
Journals: 3
Journal size: 32MB
Resource groups: 46
Locking protocol: "lock_dlm"
Lock table: "kcluster:gfs2"
UUID: f8f04582-d09c-4a09-ac11-b35d59d6726b
rac disk init success
5、基础组件初始化(all nodes)
初始化所有集群基础组件,如corosync,pacemaker,corosync-qdevice:
[root@node201 KingbaseHA]# ./cluster_manager.sh --base_configure_init
init kernel soft watchdog start
init kernel soft watchdog success
config host start
config host success
add env varaible in /root/.bashrc
add env variable success
config corosync.conf start
config corosync.conf success
Starting Corosync Cluster Engine (corosync): [WARNING]
add pacemaker daemon user start
useradd: warning: the home directory already exists.
Not copying any file from skel directory into it.
Creating mailbox file: File exists
add pacemaker daemon user success
config pacemaker success
Starting Pacemaker Cluster Manager[ OK ]
config qdevice start
config qdevice success
Starting Qdisk Fenced daemon (qdisk-fenced): [ OK ]
Starting Corosync Qdevice daemon (corosync-qdevice): [ OK ]
Please note the configuration: superuser(system) and port(36321) for database(test) of resource(DB0)
Please note the configuration: superuser(system) and port(36321) for database(test) of resource(DB1)
config kingbase rac start
config kingbase rac success
add_udev_rule start
add_udev_rule success
insmod dlm.ko success
check and mknod for dlm start
check and mknod for dlm success
# 查看root用户环境变量配置
如下所示,脚本执行修改了root的环境变量:
[root@node201 KingbaseHA]# cat /root/.bashrc
# .bashrc
# User specific aliases and functions
alias rm='rm -i'
alias cp='cp -i'
alias mv='mv -i'
# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi
export install_dir=/opt/KingbaseHA
export PATH=/opt/KingbaseHA/python2.7/bin:/opt/KingbaseHA/pacemaker/sbin/:$PATH
export PATH=/opt/KingbaseHA/crmsh/bin:/opt/KingbaseHA/pacemaker/libexec/pacemaker/:$PATH
export PATH=/opt/KingbaseHA/corosync/sbin:/opt/KingbaseHA/corosync-qdevice/sbin:$PATH
export PYTHONPATH=/opt/KingbaseHA/python2.7/lib/python2.7/site-packages/:/opt/KingbaseHA/crmsh/lib/python2.7/site-packages:$PYTHONPATH
export COROSYNC_MAIN_CONFIG_FILE=/opt/KingbaseHA/corosync/etc/corosync/corosync.conf
export CRM_CONFIG_FILE=/opt/KingbaseHA/crmsh/etc/crm/crm.conf
export OCF_ROOT=/opt/KingbaseHA/pacemaker/ocf
export HA_SBIN_DIR=/opt/KingbaseHA/pacemaker/sbin/
export QDEVICE_SBIN_DIR=/opt/KingbaseHA/corosync-qdevice/sbin/
export LD_LIBRARY_PATH=/opt/KingbaseHA/lib64/:$LD_LIBRARY_PATH
export HA_INSTALL_PATH=/opt/KingbaseHA
export PATH=/opt/KingbaseHA/dlm-dlm/sbin:/opt/KingbaseHA/gfs2-utils/sbin:$PATH
export LD_LIBRARY_PATH=/opt/KingbaseHA/corosync/lib/:$LD_LIBRARY_PATH
# 应用环境变量:
[root@node201 KingbaseHA]# source /root/.bashrc
6、查看集群资源状态
[root@node202 KingbaseHA]# crm status
Cluster Summary:
* Stack: corosync
* Current DC: node201 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Tue Aug 13 14:44:47 2024
* Last change: Tue Aug 13 14:44:39 2024 by hacluster via crmd on node201
* 2 nodes configured
* 0 resource instances configured
Node List:
* Online: [ node201 node202 ]
Full List of Resources:
* No resources
# 资源状态实时监控
[root@node202 KingbaseHA]# crm_mon -1
Cluster Summary:
* Stack: corosync
* Current DC: node201 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Tue Aug 13 14:45:42 2024
* Last change: Tue Aug 13 14:44:39 2024 by hacluster via crmd on node201
* 2 nodes configured
* 0 resource instances configured
Node List:
* Online: [ node201 node202 ]
Active Resources:
* No active resources
7、初始化共享存储文件系统(gfs2)(all nodes)
[root@node201 KingbaseHA]# ./cluster_manager.sh --init_gfs2
init gfs2 start
current OS kernel version does not support updating gfs2, please confirm whether to continue? (Y/N):
y
init the OS native gfs2 success
8、配置gfs2等资源(任意节点)
配置fence、dlm和gfs2资源:
[root@node201 KingbaseHA]# ./cluster_manager.sh --config_gfs2_resource
config dlm and gfs2 resource start
f8f04582-d09c-4a09-ac11-b35d59d6726b
config dlm and gfs2 resource success
9、查看集群资源状态
如下所示,集群资源dlm、gfs2、fence_qdisk等资源已被加载:
[root@node201 KingbaseHA]# crm status
Cluster Summary:
* Stack: corosync
* Current DC: node201 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Tue Aug 13 14:48:16 2024
* Last change: Tue Aug 13 14:48:13 2024 by root via cibadmin on node201
* 2 nodes configured
* 6 resource instances configured
Node List:
* Online: [ node201 node202 ]
Full List of Resources:
* fence_qdisk_0 (stonith:fence_qdisk): Started node202
* fence_qdisk_1 (stonith:fence_qdisk): Started node201
* Clone Set: clone-dlm [dlm]:
* Started: [ node201 node202 ]
* Clone Set: clone-gfs2 [gfs2]:
* gfs2 (ocf::heartbeat:Filesystem): Starting node201
* gfs2 (ocf::heartbeat:Filesystem): Starting node202
10、查看集群cib配置
[root@node201 KingbaseHA]# crm config show
node 1: node201
node 2: node202
primitive dlm ocf:pacemaker:controld \
params daemon="/opt/KingbaseHA/dlm-dlm/sbin/dlm_controld" dlm_tool="/opt/KingbaseHA/dlm-dlm/sbin/dlm_tool" args="-s 0 -f 0" \
op start interval=0 \
op stop interval=0 \
op monitor interval=60 timeout=60
primitive fence_qdisk_0 stonith:fence_qdisk \
params qdisk_path="/dev/sdc" qdisk_fence_tool="/opt/KingbaseHA/corosync-qdevice/sbin/qdisk-fence-tool" pcmk_host_list=node201 \
op monitor interval=60s \
meta failure-timeout=5min
primitive fence_qdisk_1 stonith:fence_qdisk \
params qdisk_path="/dev/sdc" qdisk_fence_tool="/opt/KingbaseHA/corosync-qdevice/sbin/qdisk-fence-tool" pcmk_host_list=node202 \
op monitor interval=60s \
meta failure-timeout=5min
primitive gfs2 Filesystem \
params device="-U f8f04582-d09c-4a09-ac11-b35d59d6726b" directory="/sharedata/data_gfs2" fstype=gfs2 \
op start interval=0 timeout=60 \
op stop interval=0 timeout=60 \
op monitor interval=30s timeout=60 OCF_CHECK_LEVEL=20 \
meta failure-timeout=5min
clone clone-dlm dlm \
meta interleave=true target-role=Started
clone clone-gfs2 gfs2 \
meta interleave=true target-role=Started
colocation cluster-colo1 inf: clone-gfs2 clone-dlm
order cluster-order1 clone-dlm clone-gfs2
location fence_qdisk_0-on-node202 fence_qdisk_0 1800: node202
location fence_qdisk_1-on-node201 fence_qdisk_1 1800: node201
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=2.0.3-4b1f869f0f \
cluster-infrastructure=corosync \
cluster-name=kcluster
三、创建数据库及实例
1、初始化数据库实例(任意节点)
[root@node201 KingbaseHA]# ./cluster_manager.sh --init_rac
init KingbaseES RAC start
create_rac_share_dir start
create_rac_share_dir success
......
成功。您现在可以用下面的命令开启数据库服务器:
./sys_ctl -D /sharedata/data_gfs2/kingbase/data -l 日志文件 start
init KingbaseES RAC success
2、配置数据库资源(任意节点)
如下所示,数据库资源DB已被集群加载:
[root@node201 KingbaseHA]# ./cluster_manager.sh --config_rac_resource
crm configure DB resource start
crm configure DB resource end
[root@node201 KingbaseHA]# crm config show
node 1: node201
node 2: node202
primitive DB ocf:kingbase:kingbase \
params sys_ctl="/opt/Kingbase/ES/V8/Server/bin/sys_ctl" ksql="/opt/Kingbase/ES/V8/Server/bin/ksql" sys_isready="/opt/Kingbase/ES/V8/Server/bin/sys_isready" kb_data="/sharedata/data_gfs2/kingbase/data" kb_dba=kingbase kb_host=0.0.0.0 kb_user=system kb_port=55321 kb_db=template1 logfile="/opt/Kingbase/ES/V8/Server/log/kingbase1.log" \
op start interval=0 timeout=120 \
op stop interval=0 timeout=120 \
op monitor interval=9s timeout=30 on-fail=stop \
meta failure-timeout=5min
primitive dlm ocf:pacemaker:controld \
params daemon="/opt/KingbaseHA/dlm-dlm/sbin/dlm_controld" dlm_tool="/opt/KingbaseHA/dlm-dlm/sbin/dlm_tool" args="-s 0 -f 0" \
op start interval=0 \
op stop interval=0 \
op monitor interval=60 timeout=60
primitive fence_qdisk_0 stonith:fence_qdisk \
params qdisk_path="/dev/sdc" qdisk_fence_tool="/opt/KingbaseHA/corosync-qdevice/sbin/qdisk-fence-tool" pcmk_host_list=node201 \
op monitor interval=60s \
meta failure-timeout=5min
primitive fence_qdisk_1 stonith:fence_qdisk \
params qdisk_path="/dev/sdc" qdisk_fence_tool="/opt/KingbaseHA/corosync-qdevice/sbin/qdisk-fence-tool" pcmk_host_list=node202 \
op monitor interval=60s \
meta failure-timeout=5min
primitive gfs2 Filesystem \
params device="-U f8f04582-d09c-4a09-ac11-b35d59d6726b" directory="/sharedata/data_gfs2" fstype=gfs2 \
op start interval=0 timeout=60 \
op stop interval=0 timeout=60 \
op monitor interval=30s timeout=60 OCF_CHECK_LEVEL=20 \
meta failure-timeout=5min
clone clone-DB DB \
meta interleave=true target-role=Started
clone clone-dlm dlm \
meta interleave=true target-role=Started
clone clone-gfs2 gfs2 \
meta interleave=true target-role=Started
colocation cluster-colo1 inf: clone-gfs2 clone-dlm
order cluster-order1 clone-dlm clone-gfs2
order cluster-order2 clone-dlm clone-gfs2 clone-DB
location fence_qdisk_0-on-node202 fence_qdisk_0 1800: node202
location fence_qdisk_1-on-node201 fence_qdisk_1 1800: node201
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=2.0.3-4b1f869f0f \
cluster-infrastructure=corosync \
cluster-name=kcluster \
load-threshold="0%"
3、查看资源状态
[root@node201 KingbaseHA]# ./cluster_manager.sh status
corosync (pid 20963) is running...
pacemakerd (pid 21004) is running...
corosync-qdevice (pid 21103) is running...
qdisk-fenced (pid 21077) is running...
[root@node201 KingbaseHA]# crm status
Cluster Summary:
* Stack: corosync
* Current DC: node201 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Tue Aug 13 14:58:01 2024
* Last change: Tue Aug 13 14:53:19 2024 by root via cibadmin on node201
* 2 nodes configured
* 8 resource instances configured
Node List:
* Online: [ node201 node202 ]
Full List of Resources:
* fence_qdisk_0 (stonith:fence_qdisk): Started node202
* fence_qdisk_1 (stonith:fence_qdisk): Started node201
* Clone Set: clone-dlm [dlm]:
* Started: [ node201 node202 ]
* Clone Set: clone-gfs2 [gfs2]:
* Started: [ node201 node202 ]
* Clone Set: clone-DB [DB]: # 数据库资源DB被加载
* Started: [ node201 node202 ]
4、启动和停止数据库资源DB
# 数据库资源DB状态
[root@node201 KingbaseHA]# crm resource status clone-DB
resource clone-DB is running on: node201
resource clone-DB is running on: node202
# 数据库服务状态
[root@node201 KingbaseHA]# netstat -antlp |grep 55321
tcp 0 0 0.0.0.0:55321 0.0.0.0:* LISTEN 26623/kingbase
[root@node201 KingbaseHA]# ps -ef |grep kingbase
......
kingbase 26623 1 0 14:53 ? 00:00:00 /opt/Kingbase/ES/V8/KESRealPro/V008R006C008M030B0010/Server/bin/kingbase -D /sharedata/data_gfs2/kingbase/data -c config_file=/sharedata/data_gfs2/kingbase/data/kingbase.conf -c log_directory=sys_log -h 0.0.0.0
kingbase 26708 26623 0 14:53 ? 00:00:00 kingbase: logger
kingbase 26721 26623 0 14:53 ? 00:00:00 kingbase: lmon
kingbase 26725 26623 0 14:53 ? 00:00:00 kingbase: lms 1
kingbase 26726 26623 0 14:53 ? 00:00:00 kingbase: lms 2
kingbase 26727 26623 0 14:53 ? 00:00:00 kingbase: lms 3
kingbase 26728 26623 0 14:53 ? 00:00:00 kingbase: lms 4
kingbase 26729 26623 0 14:53 ? 00:00:00 kingbase: lms 5
kingbase 26730 26623 0 14:53 ? 00:00:00 kingbase: lms 6
kingbase 26731 26623 0 14:53 ? 00:00:00 kingbase: lms 7
kingbase 26959 26623 0 14:53 ? 00:00:00 kingbase: checkpointer performing checkpoint
kingbase 26960 26623 0 14:53 ? 00:00:00 kingbase: background writer
kingbase 26961 26623 0 14:53 ? 00:00:00 kingbase: global deadlock checker
kingbase 26962 26623 0 14:53 ? 00:00:00 kingbase: transaction syncer
kingbase 26963 26623 0 14:53 ? 00:00:00 kingbase: walwriter
kingbase 26964 26623 0 14:53 ? 00:00:00 kingbase: autovacuum launcher
kingbase 26965 26623 0 14:53 ? 00:00:00 kingbase: archiver last was 000000010000000000000005
kingbase 26968 26623 0 14:53 ? 00:00:00 kingbase: stats collector
kingbase 26969 26623 0 14:53 ? 00:00:00 kingbase: kwr collector
kingbase 26970 26623 0 14:53 ? 00:00:00 kingbase: ksh writer
kingbase 26971 26623 0 14:53 ? 00:00:00 kingbase: ksh collector
kingbase 26972 26623 0 14:53 ? 00:00:00 kingbase: logical replication launcher
# 停止数据库资源DB
[root@node202 KingbaseHA]# crm resource stop clone-DB
[root@node202 KingbaseHA]# crm resource status clone-DB
resource clone-DB is NOT running
resource clone-DB is NOT running
# 查看集群资源状态
[root@node201 KingbaseHA]# crm status
Cluster Summary:
* Stack: corosync
* Current DC: node201 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Tue Aug 13 15:06:59 2024
* Last change: Tue Aug 13 15:05:58 2024 by root via cibadmin on node201
* 2 nodes configured
* 8 resource instances configured (2 DISABLED)
Node List:
* Online: [ node201 node202 ]
Full List of Resources:
* fence_qdisk_0 (stonith:fence_qdisk): Started node202
* fence_qdisk_1 (stonith:fence_qdisk): Started node201
* Clone Set: clone-dlm [dlm]:
* Started: [ node201 node202 ]
* Clone Set: clone-gfs2 [gfs2]:
* Started: [ node201 node202 ]
* Clone Set: clone-DB [DB]: # 数据库资源被停止
* Stopped (disabled): [ node201 node202 ]
# 启动数据库资源DB
[root@node201 KingbaseHA]# crm resource start clone-DB
[root@node201 KingbaseHA]# crm resource status clone-DB
resource clone-DB is running on: node201
resource clone-DB is running on: node202
[root@node201 KingbaseHA]# netstat -antlp |grep 55321
tcp 0 0 0.0.0.0:55321 0.0.0.0:* LISTEN 2611/kingbase
四、访问数据库
test=# create database prod;
CREATE DATABASE
test=# \c prod
You are now connected to database "prod" as userName "system".
prod=# create table t1 (id int ,name varchar(10));
CREATE TABLE
prod=# insert into t1 values (generate_series(1,10000),'usr'||generate_series(1,10000));
INSERT 0 10000
prod=# select count(*) from t1;
count
-------
10000
(1 row)
另一节点:
prod=# select count(*) from t1;
count
-------
10000
(1 row)
五、启停集群
1、停止集群服务
[root@node201 KingbaseHA]# ./cluster_manager.sh stop
Signaling Pacemaker Cluster Manager to terminate[ OK ]
Waiting for cluster services to unload...[ OK ]
Signaling Qdisk Fenced daemon (qdisk-fenced) to terminate: [ OK ]
Waiting for qdisk-fenced services to unload:..[ OK ]
Signaling Corosync Qdevice daemon (corosync-qdevice) to terminate: [ OK ]
Waiting for corosync-qdevice services to unload:.[ OK ]
Signaling Corosync Cluster Engine (corosync) to terminate: [ OK ]
Waiting for corosync services to unload:..[ OK ]
2、启动集群服务
[root@node201 KingbaseHA]# ./cluster_manager.sh start
Waiting for node failover handling:[ OK ]
Starting Corosync Cluster Engine (corosync): [WARNING]
clean qdisk fence flag start
clean qdisk fence flag success
Starting Qdisk Fenced daemon (qdisk-fenced): [ OK ]
Starting Corosync Qdevice daemon (corosync-qdevice): [ OK ]
Waiting for quorate:.............[ OK ]
Starting Pacemaker Cluster Manager[ OK ]
3、查看集群资源状态
如下所示,启动集群后,其资源并未加载:
[root@node201 KingbaseHA]# crm status
Cluster Summary:
* Stack: corosync
* Current DC: node202 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Tue Aug 13 15:08:52 2024
* Last change: Tue Aug 13 15:08:50 2024 by hacluster via crmd on node202
* 2 nodes configured
* 0 resource instances configured
Node List:
* Online: [ node201 node202 ]
Full List of Resources:
* No resources
4、加载和启动gfs2(dlm)资源
[root@node201 KingbaseHA]# ./cluster_manager.sh --config_gfs2_resource
config dlm and gfs2 resource start
f8f04582-d09c-4a09-ac11-b35d59d6726b
config dlm and gfs2 resource success
[root@node201 KingbaseHA]# crm status
Cluster Summary:
* Stack: corosync
* Current DC: node202 (version 2.0.3-4b1f869f0f) - partition with quorum
* Last updated: Tue Aug 13 15:16:54 2024
* Last change: Tue Aug 13 15:16:51 2024 by root via cibadmin on node201
* 2 nodes configured
* 6 resource instances configured
Node List:
* Online: [ node201 node202 ]
Full List of Resources:
* fence_qdisk_0 (stonith:fence_qdisk): Started node202
* fence_qdisk_1 (stonith:fence_qdisk): Started node201
* Clone Set: clone-dlm [dlm]:
* Started: [ node201 node202 ]
* Clone Set: clone-gfs2 [gfs2]:
* gfs2 (ocf::heartbeat:Filesystem): Starting node201
* gfs2 (ocf::heartbeat:Filesystem): Starting node202
5、加载和启动数据库资源DB
[root@node201 KingbaseHA]# ./cluster_manager.sh --config_rac_resource
crm configure DB resource start
crm configure DB resource end
# 如下所示,数据库资源DB被加载到集群:
[root@node201 KingbaseHA]# crm config show
node 1: node201
node 2: node202
primitive DB ocf:kingbase:kingbase \
params sys_ctl="/opt/Kingbase/ES/V8/Server/bin/sys_ctl" ksql="/opt/Kingbase/ES/V8/Server/bin/ksql" sys_isready="/opt/Kingbase/ES/V8/Server/bin/sys_isready" kb_data="/sharedata/data_gfs2/kingbase/data" kb_dba=kingbase kb_host=0.0.0.0 kb_user=system kb_port=55321 kb_db=template1 logfile="/opt/Kingbase/ES/V8/Server/log/kingbase1.log" \
op start interval=0 timeout=120 \
op stop interval=0 timeout=120 \
op monitor interval=9s timeout=30 on-fail=stop \
meta failure-timeout=5min
primitive dlm ocf:pacemaker:controld \
params daemon="/opt/KingbaseHA/dlm-dlm/sbin/dlm_controld" dlm_tool="/opt/KingbaseHA/dlm-dlm/sbin/dlm_tool" args="-s 0 -f 0" \
op start interval=0 \
op stop interval=0 \
op monitor interval=60 timeout=60
primitive fence_qdisk_0 stonith:fence_qdisk \
params qdisk_path="/dev/sdc" qdisk_fence_tool="/opt/KingbaseHA/corosync-qdevice/sbin/qdisk-fence-tool" pcmk_host_list=node201 \
op monitor interval=60s \
meta failure-timeout=5min
primitive fence_qdisk_1 stonith:fence_qdisk \
params qdisk_path="/dev/sdc" qdisk_fence_tool="/opt/KingbaseHA/corosync-qdevice/sbin/qdisk-fence-tool" pcmk_host_list=node202 \
op monitor interval=60s \
meta failure-timeout=5min
primitive gfs2 Filesystem \
params device="-U f8f04582-d09c-4a09-ac11-b35d59d6726b" directory="/sharedata/data_gfs2" fstype=gfs2 \
op start interval=0 timeout=60 \
op stop interval=0 timeout=60 \
op monitor interval=30s timeout=60 OCF_CHECK_LEVEL=20 \
meta failure-timeout=5min
clone clone-DB DB \
meta interleave=true target-role=Started
clone clone-dlm dlm \
meta interleave=true target-role=Started
clone clone-gfs2 gfs2 \
meta interleave=true target-role=Started
colocation cluster-colo1 inf: clone-gfs2 clone-dlm
order cluster-order1 clone-dlm clone-gfs2
order cluster-order2 clone-dlm clone-gfs2 clone-DB
location fence_qdisk_0-on-node202 fence_qdisk_0 1800: node202
location fence_qdisk_1-on-node201 fence_qdisk_1 1800: node201
property cib-bootstrap-options: \
have-watchdog=false \
dc-version=2.0.3-4b1f869f0f \
cluster-infrastructure=corosync \
cluster-name=kcluster \
load-threshold="0%"
6、数据库资源状态
[root@node201 KingbaseHA]# crm resurce status clone-DB
resource clone-DB is running on: node201
resource clone-DB is running on: node202
[root@node201 KingbaseHA]# !net
netstat -antlp |grep 55321
tcp 0 0 0.0.0.0:55321 0.0.0.0:* LISTEN 16835/kingbase
参考文档:
环境准备和具体部署参考:https://help.kingbase.com.cn/v9/highly/RAC/rac/rac-3.html#id17