openstack高可用(1)-环境组件
##1.初始化系统
##2.配置ntp服务
##3.配置hosts文件
##4.配置yum源
##5.在所有节点启用openstack库
yum install centos-release-openstack-train -y yum install python-openstackclient -y
pacemaker
##6.在3台控制节点安装pacemaker
yum install -y pcs pacemaker corosync fence-agents-all resource-agents systemctl enable pcsd;systemctl restart pcsd;systemctl status pcsd
##配置主机互信
ssh-keygen -t rsa ssh-copy-id -i /root/.ssh/id_rsa.pub root@controller01 ssh-copy-id -i /root/.ssh/id_rsa.pub root@controller02 ssh-copy-id -i /root/.ssh/id_rsa.pub root@controller03
##为3个控制节点的hacluster用户设置密码
echo hacluster|passwd --stdin hacluster
[root@controller01 ~]# echo hacluster|passwd --stdin hacluster
Changing password for user hacluster.
passwd: all authentication tokens updated successfully.
##创建集群
pcs cluster auth controller01 controller02 controller03 -u hacluster -p hacluster --force pcs cluster setup --force --name cluster01 controller01 controller02 controller03 pcs cluster enable --all pcs cluster start --all ##查看集群状态 pcs status cluster ##查看corosync状态 pcs status corosync ##查看隔离资源 pcs stonith show
[root@controller01 ~]# pcs cluster auth controller01 controller02 controller03 -u hacluster -p hacluster --force
controller01: Authorized
controller02: Authorized
controller03: Authorized
[root@controller01 ~]# pcs cluster setup --force --name cluster01 controller01 controller02 controller03
Destroying cluster on nodes: controller01, controller02, controller03...
controller01: Stopping Cluster (pacemaker)...
controller03: Stopping Cluster (pacemaker)...
controller02: Stopping Cluster (pacemaker)...
controller01: Successfully destroyed cluster
controller02: Successfully destroyed cluster
controller03: Successfully destroyed cluster
Sending 'pacemaker_remote authkey' to 'controller01', 'controller02', 'controller03'
controller01: successful distribution of the file 'pacemaker_remote authkey'
controller02: successful distribution of the file 'pacemaker_remote authkey'
controller03: successful distribution of the file 'pacemaker_remote authkey'
Sending cluster config files to the nodes...
controller01: Succeeded
controller02: Succeeded
controller03: Succeeded
Synchronizing pcsd certificates on nodes controller01, controller02, controller03...
controller01: Success
controller02: Success
controller03: Success
Restarting pcsd on the nodes in order to reload the certificates...
controller01: Success
controller02: Success
controller03: Success
[root@controller01 ~]# pcs cluster enable --all
controller01: Cluster Enabled
controller02: Cluster Enabled
controller03: Cluster Enabled
[root@controller01 ~]# pcs cluster start --all
controller01: Starting Cluster (corosync)...
controller02: Starting Cluster (corosync)...
controller03: Starting Cluster (corosync)...
controller01: Starting Cluster (pacemaker)...
controller02: Starting Cluster (pacemaker)...
controller03: Starting Cluster (pacemaker)...
[root@controller01 ~]# pcs status cluster
Cluster Status:
Stack: unknown
Current DC: NONE
Last updated: Wed Jul 29 17:21:30 2020 Last change:
0 nodes configured
0 resources configured
PCSD Status:
controller01: Online
controller03: Online
controller02: Online
[root@controller01 ~]# pcs status corosync
Membership information
----------------------
Nodeid Votes Name
1 1 controller01 (local)
2 1 controller02
3 1 controller03
[root@controller01 ~]# pcs stonith show
NO stonith devices configured
haproxy
##7.在3台控制节点安装haproxy
yum -y install haproxy echo "net.ipv4.ip_nonlocal_bind=1">/etc/sysctl.conf echo "net.ipv4.ip_forward = 1" >>/etc/sysctl.conf sysctl -p
##修改haproxy配置文件
cp /etc/haproxy/haproxy.cfg /etc/haproxy/haproxy.cfg.bk vim /etc/haproxy/haproxy.cfg
[root@controller01 ~]# egrep -v '^$|#' /etc/haproxy/haproxy.cfg global log 127.0.0.1 local2 chroot /var/lib/haproxy pidfile /var/run/haproxy.pid maxconn 4000 user haproxy group haproxy daemon stats socket /var/lib/haproxy/stats defaults mode tcp log global option httplog option dontlognull option http-server-close option forwardfor except 127.0.0.0/8 option redispatch retries 3 timeout http-request 10s timeout queue 1m timeout connect 10s timeout client 1m timeout server 1m timeout http-keep-alive 10s timeout check 10s maxconn 3000 frontend vip-db bind 10.199.103.21:3306 timeout client 90m default_backend db-galera backend db-galera option httpchk option tcpka stick-table type ip size 1000 stick on dst timeout server 90m server controller01 10.199.103.13:3306 check inter 2s port 9200 backup on-marked-down shutdown-sessions server controller02 10.199.103.15:3306 check inter 2s port 9200 backup on-marked-down shutdown-sessions server controller03 10.199.103.17:3306 check inter 2s port 9200 backup on-marked-down shutdown-sessions listen vip-keystone bind 10.199.103.21:5000 mode http server controller01 10.199.103.13:5000 check port 5000 server controller02 10.199.103.15:5000 check port 5000 server controller03 10.199.103.17:5000 check port 5000 listen vip-glance bind 10.199.103.21:9292 mode http server controller01 10.199.103.13:9292 check port 9292 server controller02 10.199.103.15:9292 check port 9292 server controller03 10.199.103.17:9292 check port 9292 listen vip-placement bind 10.199.103.21:8778 mode http server controller01 10.199.103.13:8778 check port 8778 server controller02 10.199.103.15:8778 check port 8778 server controller03 10.199.103.17:8778 check port 8778 listen vip-nova-api bind 10.199.103.21:8774 mode http server controller01 10.199.103.13:8774 check port 8774 server controller02 10.199.103.15:8774 check port 8774 server controller03 10.199.103.17:8774 check port 8774 listen vip-nova-metadata bind 10.199.103.21:8775 mode http server controller01 10.199.103.13:8775 check port 8775 server controller02 10.199.103.15:8775 check port 8775 server controller03 10.199.103.17:8775 check port 8775 listen vip-nova-vnc bind 10.199.103.21:6080 mode http server controller01 10.199.103.13:6080 check port 6080 server controller02 10.199.103.15:6080 check port 6080 server controller03 10.199.103.17:6080 check port 6080 listen vip-neutron bind 10.199.103.21:9696 mode http server controller01 10.199.103.13:9696 check port 9696 server controller02 10.199.103.15:9696 check port 9696 server controller03 10.199.103.17:9696 check port 9696 listen vip-dashboard bind 10.199.103.21:80 mode http server controller01 10.199.103.13:80 check port 80 server controller02 10.199.103.15:80 check port 80 server controller03 10.199.103.17:80 check port 80 listen vip-cinder bind 10.199.103.21:8776 mode http server controller01 10.199.103.13:8776 check port 8776 server controller02 10.199.103.15:8776 check port 8776 server controller03 10.199.103.17:8776 check port 8776
##禁用stonith:
##stonith是一种能够接受指令断电的物理设备,环境无此设备,如果不关闭该选项,执行pcs命令总是含其报错信息
pcs property set stonith-enabled=false
##二个节点时,忽略节点quorum功能:
pcs property set no-quorum-policy=ignore
##验证集群配置信息
crm_verify -L -V
##创建pacemaker集群haproxy
pcs resource create lb-haproxy systemd:haproxy --clone
##创建VIP
pcs resource create vip IPaddr2 ip=10.199.103.21
# 设置资源启动顺序,先vip再lb-haproxy-clone;
# 通过“cibadmin --query --scope constraints”可查看资源约束配置
pcs constraint order start vip then lb-haproxy-clone kind=Optional
# 官方建议设置vip运行在haproxy active的节点,通过绑定lb-haproxy-clone与vip服务,将两种资源约束在1个节点;
# 约束后,从资源角度看,其余暂时没有获得vip的节点的haproxy会被pcs关闭
##pcs constraint colocation add lb-haproxy-clone with vip
pcs resource
mariadb
##8.在3台控制节点安装数据库
##删库
yum remove mariadb mariadb-server python2-PyMySQL -y yum remove mariadb-server-galera mariadb-galera-common galera xinetd rsync -y yum remove mariadb-galera-server xinetd rsync -y rm -rf /var/lib/mysql rm -rf /etc/my.cnf.d rm -rf /var/log/mariadb
##yum重新安装软件包如果缺少文件,重启服务器解决
yum install mariadb-galera-server xinetd rsync -y ##yum install -y mariadb mariadb-galera-server mariadb-galera-common galera rsync xinetd ##不要添加数据库开机启动
##初始化数据库
systemctl restart mariadb;systemctl status mariadb
mysql_secure_installation
##配置 haproxy 健康检查(所有节点)
##登录数据库,创建 clustercheck 用户,并设置其本地访问数据库的权限
systemctl start mariadb.service mysql -e "CREATE USER 'clustercheck'@'localhost' IDENTIFIED BY 'root1234';" systemctl stop mariadb.service
##为 clustercheck 用户创建配置文件 /etc/sysconfig/clustercheck
cat > /etc/sysconfig/clustercheck << EOF MYSQL_USERNAME="clustercheck" MYSQL_PASSWORD="root1234" MYSQL_HOST="localhost" MYSQL_PORT="3306" EOF
##创建个配置 HAProxy 监控服务 /etc/xinetd.d/galera-monitor
cat > /etc/xinetd.d/galera-monitor << EOF service galera-monitor { port = 9200 disable = no socket_type = stream protocol = tcp wait = no user = root group = root groups = yes server = /usr/bin/clustercheck type = UNLISTED per_source = UNLIMITED log_on_success = log_on_failure = HOST flags = REUSE } EOF
systemctl daemon-reload
systemctl enable xinetd;systemctl restart xinetd
systemctl status xinetd
##修改 mariadb 最大连接数
##错误:openstack中数据库连接数太多-pymysql.err.OperationalError,1040, u'Too many connections'
##问题解决
##更改 MySQL 在 Linux 的最大文件描述符限制,编辑/usr/lib/systemd/system/mariadb.service文件,在文件[Service]下添加:
##LimitNOFILE=65535
##LimitNPROC=65535
yum -y install openstack-utils openstack-config --set /usr/lib/systemd/system/mariadb.service Service LimitNOFILE 65535 openstack-config --set /usr/lib/systemd/system/mariadb.service Service LimitNPROC 65535 grep Limit /usr/lib/systemd/system/mariadb.service systemctl --system daemon-reload
##修改mariadb配置文件
cat>/etc/my.cnf.d/openstack.cnf<<EOF [mysqld] bind-address = 10.199.103.17 default-storage-engine = innodb innodb_file_per_table = on max_connections = 4096 collation-server = utf8_general_ci character-set-server = utf8 EOF
mv /etc/my.cnf.d/galera.cnf /etc/my.cnf.d/galera.cnf.bk cat > /etc/my.cnf.d/galera.cnf << EOF [mysqld] skip-name-resolve=1 binlog_format=ROW default-storage-engine=innodb innodb_autoinc_lock_mode=2 bind-address=10.199.103.17 wsrep_on=1 wsrep_provider=/usr/lib64/galera/libgalera_smm.so wsrep_provider_options="pc.recovery=TRUE;gcache.size=1024M" wsrep_cluster_name="openstack_cluster" wsrep_cluster_address="gcomm://controller01,controller02,controller03" wsrep_node_name="controller03" wsrep_node_address="10.199.103.17" wsrep_slave_threads=1 wsrep_certify_nonPK=1 wsrep_max_ws_rows=1048576 wsrep_max_ws_size=2147483647 wsrep_debug=0 wsrep_convert_LOCK_to_trx=0 wsrep_retry_autocommit=1 wsrep_auto_increment_control=1 wsrep_drupal_282555_workaround=0 wsrep_causal_reads=0 wsrep_notify_cmd= wsrep_sst_method=rsync wsrep_sst_auth=root: EOF
##创建数据库集群
pcs resource create galera-cluster ocf:heartbeat:galera enable_creation=true wsrep_cluster_address="gcomm://controller01,controller02,controller03" additional_parameters='--open-files-limit=16384' enable_creation=true meta master-max=3 ordered=true op promote timeout=300s on-fail=block --master pcs constraint order start lb-haproxy-clone then galera-cluster-master pcs resource
##验证集群状态
mysql -uroot -p1234qwer -e "SHOW STATUS like 'wsrep_cluster_%';"
[root@controller01 ~]# mysql -uroot -p1234qwer -e "SHOW STATUS like 'wsrep_cluster_%';"
+--------------------------+--------------------------------------+
| Variable_name | Value |
+--------------------------+--------------------------------------+
| wsrep_cluster_conf_id | 3 |
| wsrep_cluster_size | 3 |
| wsrep_cluster_state_uuid | 93d0225d-d185-11ea-8600-86a115e07ab4 |
| wsrep_cluster_status | Primary |
+--------------------------+--------------------------------------+
##在全部控制节点验证
clustercheck
##数据库授权
mysql -uroot -p1234qwer GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '1234qwer'; GRANT ALL PRIVILEGES ON *.* TO 'root'@'localhost' IDENTIFIED BY '1234qwer'; FLUSH PRIVILEGES; quit
rabbitmq
##9.在3台控制节点安装rabbitmq
yum install rabbitmq-server -y
##监听本地地址
cat>/etc/rabbitmq/rabbitmq-env.conf<<EOF RABBITMQ_NODE_IP_ADDRESS=10.199.103.17 RABBITMQ_NODE_PORT=5672 EOF cat /etc/rabbitmq/rabbitmq-env.conf
##任选1个控制节点首先启动rabbitmq服务,这里选择controller01节点
systemctl enable rabbitmq-server.service;systemctl restart rabbitmq-server.service
systemctl status rabbitmq-server.service
##采用从主节点copy的方式保持Cookie的一致性
scp /var/lib/rabbitmq/.erlang.cookie controller02:/var/lib/rabbitmq/ scp /var/lib/rabbitmq/.erlang.cookie controller03:/var/lib/rabbitmq/
##修改controller02/03节点.erlang.cookie文件的用户/组
chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie ll /var/lib/rabbitmq/.erlang.cookie
##启动controller02/03节点的rabbitmq服务
systemctl enable rabbitmq-server.service;systemctl restart rabbitmq-server.service
##构建集群,controller02/03节点以ram节点的形式加入集群
rabbitmqctl stop_app rabbitmqctl join_cluster --ram rabbit@controller01 rabbitmqctl start_app
##设置镜像队列高可用
rabbitmqctl set_policy ha-all "^" '{"ha-mode":"all"}'
##查看镜像队列策略
rabbitmqctl list_policies
[root@controller01 haproxy]# rabbitmqctl list_policies
Listing policies
/ ha-all all ^ {"ha-mode":"all"} 0
##验证集群状态
rabbitmqctl cluster_status
[root@controller01 ~]# rabbitmqctl cluster_status
Cluster status of node rabbit@controller01
[{nodes,[{disc,[rabbit@controller01,rabbit@controller02,
rabbit@controller03]}]},
{running_nodes,[rabbit@controller02,rabbit@controller03,rabbit@controller01]},
{cluster_name,<<"rabbit@controller01">>},
{partitions,[]},
{alarms,[{rabbit@controller02,[]},
{rabbit@controller03,[]},
{rabbit@controller01,[]}]}]
##在3台控制节点安装web管理插件
rabbitmq-plugins enable rabbitmq_management
##在pacemaker集群中创建rabbitmq资源
pcs resource create rabbitmq-cluster ocf:rabbitmq:rabbitmq-server-ha --master erlang_cookie=DPMDALGUKEOMPTHWPYKC node_port=5672 \ op monitor interval=30 timeout=120 \ op monitor interval=27 role=Master timeout=120 \ op monitor interval=103 role=Slave timeout=120 OCF_CHECK_LEVEL=30 \ op start interval=0 timeout=120 \ op stop interval=0 timeout=120 \ op promote interval=0 timeout=60 \ op demote interval=0 timeout=60 \ op notify interval=0 timeout=60 \ meta notify=true ordered=false interleave=false master-max=1 \ master-node-max=1
##创建 openstack 用户
rabbitmqctl add_user openstack RABBIT_PASS rabbitmqctl set_permissions openstack ".*" ".*" ".*"
[root@controller ~]# rabbitmqctl list_users
Listing users
openstack []
guest [administrator]
memcache
##10.在3台控制节点安装memcache
yum install memcached python-memcached -y sed -i 's|127.0.0.1,::1|0.0.0.0|g' /etc/sysconfig/memcached systemctl enable memcached.service;systemctl restart memcached.service
##在pacemaker集群任意节点上创建memcache资源
pcs resource create memcached systemd:memcached --clone interleave=true
etcd
##11.安装etcd
yum install etcd -y
mv /etc/etcd/etcd.conf /etc/etcd/etcd.conf.bk cat>/etc/etcd/etcd.conf<<EOF ETCD_DATA_DIR="/var/lib/etcd/default.etcd" ETCD_LISTEN_PEER_URLS="http://10.199.103.13:2380" ETCD_LISTEN_CLIENT_URLS="http://10.199.103.13:2379" ETCD_NAME="controller01" ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.199.103.13:2380" ETCD_ADVERTISE_CLIENT_URLS="http://10.199.103.13:2379" ETCD_INITIAL_CLUSTER="controller01=http://10.199.103.13:2380,controller02=http://10.199.103.15:2380,controller03=http://10.199.103.17:2380" ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-01" ETCD_INITIAL_CLUSTER_STATE="new" EOF
systemctl enable etcd;systemctl restart etcd