PG-基于patroni高可用方案
部署实施
规划
软件版本规划如下:
- 操作系统: rhel Linux 7.6
- 数据库: PostgreSQL 12.2
- Python: Python 3.8.2
- Etcd: etcd-v3.3.22
- Patroni: patroni 1.6.5
部署规划如下:
主机 | IP | 组件 | 备注 |
---|---|---|---|
pg1 | 192.168.10.190 | PostgreSQL、Patroni、Etcd | 主节点 |
pg2 | 192.168.10.191 | PostgreSQL、Patroni、Etcd | 备节点1 |
pg3 | 192.168.10.192 | PostgreSQL、Patroni、Etcd | 备节点2 |
软件地址
#-- patroni
https://github.com/zalando/patroni
https://mirrors.aliyun.com/pypi/simple/patroni/
#-- etcd
https://github.com/etcd-io/etcd/releases/tag/v3.4.10
https://github.com/etcd-io/etcd/releases/tag/v3.3.22
#-- zookeeper
wget https://archive.apache.org/dist/zookeeper/zookeeper-3.3.6/zookeeper-3.3.6.tar.gz
wget https://archive.apache.org/dist/zookeeper/stable/apache-zookeeper-3.5.8.tar.gz
#-- python
https://www.python.org/downloads
https://mirrors.aliyun.com/pypi/simple/
# 官方源
wget -c https://www.python.org/ftp/python/3.8.2/Python-3.8.2.tar.xz
#-- 自签证书工具 cfssl,cfssljson
wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64
wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64
wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64
# postgresql
https://www.postgresql.org/download/product-categories/
环境准备
虽然Patroni支持自动化初始化PostgreSQL数据库并部署流复制,这两块工作建议手工配置。
关闭防火墙
systemctl stop firewalld.service
systemctl disable firewalld.service
安装系统依赖软件包
yum -y install libffi-devel gcc gcc-c++ zlib zlib-devel readline-devel openssl-devel bzip2-devel sqlite-devel xz lzma xz-devel gdbm gdbm-devel tk tk-devel libffi libffi-devel ncurses ncurses-devel
配置EPEL yum源
wget -O /etc/yum.repos.d/epel.repo http://mirrors.aliyun.com/repo/epel-7.repo
yum -y install jq
配置sudo权限
cat > /etc/sudoers.d/postgres <<-EOF
# postgres ALL=(ALL:ALL) NOPASSWD:ALL
postgres ALL=(ALL:ALL) NOPASSWD:/bin/systemctl stop postgresql12,/bin/systemctl start postgresql12,/bin/systemctl restart postgresql12,/bin/systemctl status postgresql12,/bin/systemctl daemon-reload,/bin/systemctl stop patroni,/bin/systemctl start patroni,/bin/systemctl restart patroni,/bin/systemctl status patroni,/bin/systemctl stop etcd,/bin/systemctl start etcd,/bin/systemctl restart etcd,/bin/systemctl status etcd
EOF
安装python3
# wget -c https://www.python.org/ftp/python/3.8.2/Python-3.8.2.tar.xz
./configure --prefix=/ups/app/postgresql/pgsql-12 --with-perl --with-tcl --with-python --with-openssl --with-pam --without-ldap --with-libxml --with-libxslt --with-systemd
make world
make install-world
配置动态库
cat > /etc/ld.so.conf.d/python3.conf <<-EOF
/ups/app/python3/lib
EOF
# 加载动态库
ldconfig -v | grep python3
# 检查确认
ldd /ups/app/python3/bin/python3
配置国内镜像源
cat > ~/.pip/pip.conf << -EOF
[global]
index-url = https://mirrors.aliyun.com/pypi/simple/
[install]
trusted-host=mirrors.aliyun.com
EOF
安装PG流复制(一主两从)
编译安装软件(所有数据库节点)
tar -xf postgresql-12.2.tar.gz
mkdir build_dir && cd build_dir
../configure --prefix=/ups/app/postgresql/pgsql-12 --with-perl --with-tcl --with-python --with-openssl --with-pam --with-gssapi --with-icu --without-ldap --with-libxml --with-libxslt --with-systemd
make world
make install-world
配置自启动服务(省略)
# 编辑服务文件
cat > /usr/lib/systemd/system/postgresql12.service <<-EOF
# It's not recommended to modify this file in-place, because it will be
# overwritten during package upgrades. If you want to customize, the
# best way is to create a file "/etc/systemd/system/postgresql12.service",
# containing
# .include /usr/lib/systemd/system/postgresql12.service
# ...make your changes here...
# For more info about custom unit files, see
# http://fedoraproject.org/wiki/Systemd#How_do_I_customize_a_unit_file.2F_add_a_custom_unit_file.3F
# Note: changing PGDATA will typically require adjusting SELinux
# configuration as well.
# Note: do not use a PGDATA pathname containing spaces, or you will
# break postgresql-setup.
[Unit]
Description=PostgreSQL 12 database server
Documentation=https://www.postgresql.org/docs/12/static/
After=syslog.target
After=network.target
[Service]
Type=notify
User=postgres
Group=postgres
# Note: avoid inserting whitespace in these Environment= lines, or you may
# break postgresql-setup.
# Location of database directory
Environment=PGDATA=/ups/data/pgdata/12/pg_root
Environment=PGHOME=/ups/app/postgresql/pgsql-12
# Where to send early-startup messages from the server (before the logging
# options of postgresql.conf take effect)
# This is normally controlled by the global default set by systemd
# StandardOutput=syslog
# Disable OOM kill on the postmaster
OOMScoreAdjust=-1000
Environment=PG_OOM_ADJUST_FILE=/proc/self/oom_score_adj
Environment=PG_OOM_ADJUST_VALUE=0
# ExecStartPre=/ups/app/postgresql/pgsql-12/bin/postgresql-12-check-db-dir \${PGDATA}
ExecStart=/ups/app/postgresql/pgsql-12/bin/postmaster -D \${PGDATA}
ExecReload=/bin/kill -HUP \$MAINPID
KillMode=mixed
KillSignal=SIGINT
# Do not set any timeout value, so that systemd will not kill postmaster
# during crash recovery.
# 禁用超时逻辑,systemd的默认超时时长是 90 秒
TimeoutSec=0
[Install]
WantedBy=multi-user.target
EOF
禁止postgresql自启动,通过patroni来管理postgresql
主库配置
主库初始化
initdb -D ${PGDATA} -U postgres --locale=en_US.UTF8 -E UTF8
配置主库参数配置
vi $PGDATA/postgresql.conf
cat >> $PGDATA/postgresql.conf <<-EOF
listen_addresses = '*'
port = 2020
wal_level = replica
archive_mode = on
#archive_command = 'test ! -f /ups/data/pgdata/12/arch/%f && cp %p /ups/data/pgdata/12/arch/%f'
#restore_command = 'cp /ups/data/pgdata/12/arch/%f %p'
archive_command = '/usr/bin/lz4 -q -z %p /ups/data/pgdata/12/arch/%f.lz4'
restore_command = '/usr/bin/lz4 -d /ups/data/pgdata/12/arch/%f.lz4 %p'
recovery_target_timeline='latest'
max_wal_senders = 10
wal_keep_segments = 64
hot_standby = on
hot_standby_feedback = on
full_page_writes = on
wal_log_hits = on
synchronous_commit = on
synchronous_standby_names = 'ANY 1 (*)'
archive_cleanup_command = 'pg_archivecleanup /ups/data/pgdata/12/arch %r'
primary_slot_name ='pgsql12_pg1'
EOF
检查确认配置文件
grep -Ev '^[[:space:]]|^#|^$' ${PGDATA}/postgresql.conf
sed -r '/^[ \t]*($|#)/d' $PGDATA/postgresql.conf
启动主库
pg_ctl start -D $PGDATA
配置postgres用户密码
psql -U postgres -c "ALTER USER postgres WITH PASSWORD 'postgres';"
创建流复制账号(pg1)
CREATE USER sync
REPLICATION
LOGIN
ENCRYPTED PASSWORD 'sync12345';
-- CONNECTION LIMIT 5
GRANT EXECUTE ON FUNCTION pg_read_binary_file(text) TO sync;
创建复制槽(pg1)patroni
-- 创建复制槽
select * from pg_create_physical_replication_slot('pgsql12_pg1');
select * from pg_create_physical_replication_slot('pgsql12_pg2');
select * from pg_create_physical_replication_slot('pgsql12_pg3');
-- 查看
select * from pg_replication_slots;
-- 删除复制槽
SELECT * FROM pg_drop_replication_slot('pgsql12_pg1');
配置客户端认证文件(pg_hba.conf) 所有服务器
cat >> ${PGDATA}/pg_hba.conf <<EOF
host all all 192.168.10.0/24 md5
host replication sync 127.0.0.1/32 md5
host replication sync 192.168.10.190/24 md5
host replication sync 192.168.10.191/24 md5
host replication sync 192.168.10.192/24 md5
EOF
pg_basebackup同步数据时,自动部署到其它节点从库
配置用户口令文件($HOME/.pgpass
) 所有服务器
touch ~/.pgpass
chmod 0600 ~/.pgpass
cat > ~/.pgpass <<EOF
192.168.10.190:2020:replication:sync:sync12345
192.168.10.191:2020:replication:sync:sync12345
192.168.10.192:2020:replication:sync:sync12345
EOF
cat <<-EOF >~/.pgpass
192.168.10.190:2020:replication:sync:sync12345
192.168.10.191:2020:replication:sync:sync12345
192.168.10.192:2020:replication:sync:sync12345
EOF
从库配置
备库初始化
# pg2
pg_basebackup -h 192.168.10.190 -p 2020 -U sync -D ${PGDATA} -w -Fp -Xs -Pv -R
# pg3
pg_basebackup -h 192.168.10.190 -p 2020 -U sync -D ${PGDATA} -w -Fp -Xs -Pv -R
-R, --write-recovery-conf
:write configuration for replication- 自动创建$PGDATA/standby.signal 标识文件,且该文件内容为空
- 自动在$PGDATA/postgresql.auto.conf 文件中添加 primary_conninfo 参数信息
修改备库参数
在postgre.auto.conf 添加 application_name =slave1
primary_conninfo = 'application_name=pgsql12_pg2 user=sync passfile=''/home/postgres/.pgpass'' host=192.168.10.190 port=2020 sslmode=prefer sslcompression=0 gssencmode=prefer krbsrvname=postgres target_session_attrs=any'
primary_slot_name ='pgsql12_pg2'
启动备库
pg_ctl start -D $PGDATA
检查同步状态
SELECT usename , application_name , client_addr, sync_state FROM pg_stat_replication;
-- 结果是f(false)则为主库,t(true)为备库
SELECT pg_is_in_recovery();
-- 检查复制情况
SELECT pid, usename, client_addr, state, write_lag, flush_lag, replay_lag FROM pg_stat_replication;
自签证书配置
背景
互联网的通信安全,建立在 SSL/TLS 协议之上。不使用 SSL/TLS 的 HTTP 通信,就是不加密的通信。
- SSL (Secure Socket Layer):为Netscape所研发,用以保障在Internet上数据传输之安全,利用数据加密(Encryption)技术,可确保数据在网络上之传输过程中不会被截取。目前一般通用之规格为40 bit之安全标准,美国则已推出128 bit之更高安全标准,但限制出境。只要3.0版本以上之I.E.或Netscape浏览器即可支持SSL。
- 安全传输层协议(TLS)用于在两个通信应用程序之间提供保密性和数据完整性。该协议由两层组成:TLS 记录协议(TLS Record)和 TLS 握手协议(TLS Handshake)。较低的层为 TLS 记录协议,位于某个可靠的传输协议(例如 TCP)上面。
安装 CFSSL相关软件
export WORK_DIR=/ups/app/cfssl
mkdir -p ${WORK_DIR} && cd ${WORK_DIR}
wget https://pkg.cfssl.org/R1.2/cfssl_linux-amd64 -O ${WORK_DIR}/cfssl
wget https://pkg.cfssl.org/R1.2/cfssljson_linux-amd64 -O ${WORK_DIR}/cfssljson
wget https://pkg.cfssl.org/R1.2/cfssl-certinfo_linux-amd64 -O ${WORK_DIR}/cfssl-certinfo
export WORK_DIR=/ups/app/cfssl
for value in $(ls -tr .); do
tmp=$(echo ${value} |awk -F"_" '{print $1}')
mv ${value} ${tmp}
chmod +x ${tmp}
ln -s ${WORK_DIR}/${tmp} /usr/local/bin/${tmp}
done
验证 cfssl 的版本为 1.2或更高
查看版本
[root@pg1 cfssl]# cfssl version
Version: 1.2.0
Revision: dev
Runtime: go1.6
[root@pg1 cfssl]#
工具命令语法
cfssl
Usage:
Available commands:
selfsign
print-defaults
revoke
certinfo
serve
version
gencert
gencrl
info
bundle
sign
genkey
ocspsign
ocspserve
ocspdump
ocsprefresh
scan
Top-level flags:
-allow_verification_with_non_compliant_keys
Allow a SignatureVerifier to use keys which are technically non-compliant with RFC6962.
-loglevel int
Log level (0 = DEBUG, 5 = FATAL) (default 1)
cfssljson
Usage of cfssljson:
-bare
the response from CFSSL is not wrapped in the API standard response
-f string
JSON input (default "-")
-stdout
output the response instead of saving to a file
证书配置
创建 CA(Certificate Authority) 证书
先用 cfssl
命令生成包含默认配置的 ca-config.json
和 ca-csr.json
文件
创建 CA 配置文件(ca-config.json)
cd /ups/app/etcd/ssl
cfssl print-defaults config > ca-config.json
cfssl print-defaults csr > ca-csr.json
然后分别修改这两个文件为如下内容
ca-config.json
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"server": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
},
"client": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"client auth"
]
}
}
}
}
-
ca-config.json:
可以定义多个 profiles,分别指定不同的过期时间、使用场景等参数;后续在签名证书时使用某个 profile;
-
signing:
表示该证书可用于签名其它证书;生成的 ca.pem 证书中 CA=TRUE;
-
server auth:
表示 Client 可以用该 CA 对 Server 提供的证书进行验证;
-
client auth:
表示 Server 可以用该 CA 对 Client 提供的证书进行验证;
创建 CA 证书签名请求(ca-csr.json)
ca-csr.json
{
"CN": "etcd",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"L": "GZ",
"ST": "GD"
}
]
}
-
CN(Common Name):
- kube-apiserver 从证书中提取该字段作为请求的用户名(User Name);浏览器使用该字段验证网站是否合法。一般写都是域名
-
ST(State):
- 州、省
-
L(Locality):
- 地区、城市
-
O(Organization Name):
- 组织名称,公司名称。kube-apiserver 从证书中提取该字段作为请求用户所属的组(Group)
-
OU(Organization Unit Name):
- 组织单位名称,公司部门名称
前面2步可以通过以下方式创建CA证书
cat > /ups/app/etcd/ssl/ca-config.json <<-EOF
{
"signing": {
"default": {
"expiry": "87600h"
},
"profiles": {
"server": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"server auth",
"client auth"
]
},
"client": {
"expiry": "87600h",
"usages": [
"signing",
"key encipherment",
"client auth"
]
}
}
}
}
EOF
cat > /ups/app/etcd/ssl/ca-csr.json <<-EOF
{
"CN": "etcd",
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "GD",
"L": "GZ"
}
]
}
EOF
生成 CA 证书和私钥
cd /ups/app/etcd/ssl
cfssl gencert -initca ca-csr.json | cfssljson -bare ca
[root@pg1 ssl]# cfssl gencert -initca ca-csr.json | cfssljson -bare ca
2020/07/29 20:01:32 [INFO] generating a new CA key and certificate from CSR
2020/07/29 20:01:32 [INFO] generate received request
2020/07/29 20:01:32 [INFO] received CSR
2020/07/29 20:01:32 [INFO] generating key: rsa-2048
2020/07/29 20:01:32 [INFO] encoded CSR
2020/07/29 20:01:32 [INFO] signed certificate with serial number 51387564996842455704424109245687617617608202805
[root@pg1 ssl]#
CA 有关证书列表如下:
[root@pg1 ssl]# tree
.
├── ca.csr
├── ca-key.pem
├── ca.pem
├── ca-config.json
└── ca-csr.json
创建 etcd 的TLS认证证书,生成 etcd 证书和私钥
在 /ups/app/etcd/ssl
下添加文件 etcd-csr.json
,内容如下
cat > /ups/app/etcd/ssl/etcd-csr.json <<-EOF
{
"CN": "etcd",
"hosts": [
"127.0.0.1",
"192.168.10.190",
"192.168.10.191",
"192.168.10.192"
],
"key": {
"algo": "rsa",
"size": 2048
},
"names": [
{
"C": "CN",
"ST": "GD",
"L": "GZ"
}
]
}
EOF
生成 etcd server证书
cd /ups/app/etcd/ssl
cfssl gencert -ca=/ups/app/etcd/ssl/ca.pem \
-ca-key=/ups/app/etcd/ssl/ca-key.pem \
-config=/ups/app/etcd/ssl/ca-config.json \
-profile=server /ups/app/etcd/ssl/etcd-csr.json | cfssljson -bare etcd
# 或
cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=server etcd-csr.json | cfssljson -bare etcd
etcd 有关证书证书列表如下
[root@pg1 ssl]# cfssl gencert -ca=ca.pem -ca-key=ca-key.pem -config=ca-config.json -profile=etcd etcd-csr.json | cfssljson -bare etcd
2020/07/29 20:02:11 [INFO] generate received request
2020/07/29 20:02:11 [INFO] received CSR
2020/07/29 20:02:11 [INFO] generating key: rsa-2048
2020/07/29 20:02:11 [INFO] encoded CSR
2020/07/29 20:02:11 [INFO] signed certificate with serial number 624986965983467912700213173636453978413637921699
2020/07/29 20:02:11 [WARNING] This certificate lacks a "hosts" field. This makes it unsuitable for
websites. For more information see the Baseline Requirements for the Issuance and Management
of Publicly-Trusted Certificates, v.1.1.6, from the CA/Browser Forum (https://cabforum.org);
specifically, section 10.2.3 ("Information Requirements").
[root@pg1 ssl]# ls
ca-config.json ca.csr ca-csr.json ca-key.pem ca.pem etcd.csr etcd-csr.json etcd-key.pem etcd.pem
[root@pg1 ssl]#
证书分发到其它所有节点
# 给证书读权限
chmod 644 /ups/app/etcd/ssl/*
cd /ups/app/etcd/ssl/
for IP in pg2 pg3;do
scp ca-key.pem ca.pem etcd.pem etcd-key.pem $IP:/ups/app/etcd/ssl/
done
DCS软件安装
DCS主要有etcd,zookeeper,consul
Etcd安装
三台主机上下载并安装ETCD,如下:
# wget -c https://github.com/etcd-io/etcd/releases/download/v3.4.7/etcd-v3.4.7-linux-amd64.tar.gz
tar -xf etcd-v3.3.22-linux-amd64.tar.gz -C /ups/app/
cd /ups/app
mv etcd-v3.3.22-linux-amd64/ etcd
chown -R root:root etcd
mkdir -p ./{bin,cfg,ssl,log,data} && mv etcd etcdctl ./bin/
# 可选
useradd etcd -M -d /ups/app/etcd -c "Etcd user" -r -s /sbin/nologin
编辑配置文件
注意是否启动自签证书+SSL+HTTPS组合配置,若未启动SSL,则修改对应http协议
节点1
- 常规(不适用SSL)
cat > /ups/app/etcd/cfg/etcd.conf <<-EOF
#[Member]
#etcd实例名称
ETCD_NAME="etcd01"
#etcd数据保存目录
ETCD_DATA_DIR="/ups/app/etcd/data/default.etcd"
#集群内部通信使用的URL
ETCD_LISTEN_PEER_URLS="http://192.168.10.190:2380"
#供外部客户端使用的URL
ETCD_LISTEN_CLIENT_URLS="http://192.168.10.190:2379,http://127.0.0.1:2379"
#[Clustering]
#广播给集群内其他成员访问的URL
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://192.168.10.190:2380"
#广播给外部客户端使用的URL
ETCD_ADVERTISE_CLIENT_URLS="http://192.168.10.190:2379"
#初始集群成员列表
ETCD_INITIAL_CLUSTER="etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380"
#集群的名称
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-pg"
#初始集群状态,new表示新建集群| existing表示加入已有集群
ETCD_INITIAL_CLUSTER_STATE="new"
# 使用3.4.X版本时配置
# ETCD_ENABLE_V2="true"
EOF
- 使用SSL证书
cat > /ups/app/etcd/cfg/etcd.conf <<-EOF
#[Member]
#etcd实例名称,可以随意设置不重复值
ETCD_NAME="etcd01"
#etcd数据保存目录
ETCD_DATA_DIR="/ups/app/etcd/data/default.etcd"
#集群内部通信使用的URL
ETCD_LISTEN_PEER_URLS="https://192.168.10.190:2380"
#供外部客户端使用的URL
ETCD_LISTEN_CLIENT_URLS="https://192.168.10.190:2379,http://127.0.0.1:2379"
#[Clustering]
#广播给集群内其他成员访问的URL
ETCD_INITIAL_ADVERTISE_PEER_URLS="https://192.168.10.190:2380"
#广播给外部客户端使用的URL
ETCD_ADVERTISE_CLIENT_URLS="https://192.168.10.190:2379"
#初始集群成员列表
ETCD_INITIAL_CLUSTER="etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380"
#集群的名称
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-pg"
#初始集群状态,new表示新建集群| existing表示加入已有集群
ETCD_INITIAL_CLUSTER_STATE="new"
# 使用3.4.X版本时配置
# ETCD_ENABLE_V2="true"
#[Security]
ETCD_CERT_FILE="/ups/app/etcd/ssl/etcd.pem"
ETCD_KEY_FILE="/ups/app/etcd/ssl/etcd-key.pem"
ETCD_TRUSTED_CA_FILE="/ups/app/etcd/ssl/ca.pem"
ETCD_CLIENT_CERT_AUTH="true"
ETCD_PEER_CERT_FILE="/ups/app/etcd/ssl/etcd.pem"
ETCD_PEER_KEY_FILE="/ups/app/etcd/ssl/etcd-key.pem"
ETCD_PEER_TRUSTED_CA_FILE="/ups/app/etcd/ssl/ca.pem"
ETCD_PEER_CLIENT_CERT_AUTH="true"
ETCD_PEER_AUTO_TLS="true"
ETCD_AUTO_TLS="true"
EOF
配置说明:
-
ETCD_NAME:
- etcd 集群中的节点名,这里可以随意,可区分且不重复就行。
-
ETCD_LISTEN_PEER_URLS:
- 监听的用于节点之间通信的 URL,可监听多个,集群内部将通过这些 URL 进行数据交互(如选举、数据同步等)。
-
ETCD_LISTEN_CLIENT_URLS:
- 监听的用于客户端通信的 URL,同样可以监听多个。
-
ETCD_ADVERTISE_CLIENT_URLS:
- 建议使用的客户端通信 URL,该值用于 etcd 代理或 etcd 成员与 etcd 节点通信。
-
ETCD_INITIAL_ADVERTISE_PEER_URLS:
- 建议用于节点之间通信的 URL,节点间将以该值进行通信。
-
ETCD_INITIAL_CLUSTER:
- 也就是集群中所有的 initial--advertise-peer-urls 的合集。
-
ETCD_INITIAL_CLUSTER_STATE:
- 新建集群的标志。
-
ETCD_INITIAL_CLUSTER_TOKEN:
- 节点的 token 值,设置该值后集群将生成唯一 ID,并为每个节点也生成唯一 ID,当使用相同配置文件再启动一个集群时,只要该 token 值不一样,etcd 集群就不会相互影响。
节点2
cat > /ups/app/etcd/cfg/etcd.conf <<-EOF
#[Member]
#etcd实例名称
ETCD_NAME="etcd02"
#etcd数据保存目录
ETCD_DATA_DIR="/ups/app/etcd/data/default.etcd"
#集群内部通信使用的URL
ETCD_LISTEN_PEER_URLS="http://192.168.10.191:2380"
#供外部客户端使用的URL
ETCD_LISTEN_CLIENT_URLS="http://192.168.10.191:2379,http://127.0.0.1:2379"
#[Clustering]
#广播给集群内其他成员访问的URL
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://192.168.10.191:2380"
#广播给外部客户端使用的URL
ETCD_ADVERTISE_CLIENT_URLS="http://192.168.10.191:2379"
#初始集群成员列表
ETCD_INITIAL_CLUSTER="etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380"
#集群的名称
ETCD_INITIAL_CLUSTER_TOKEN="etcd-cluster-pg"
#初始集群状态,new表示新建集群| existing表示加入已有集群
ETCD_INITIAL_CLUSTER_STATE="new"
EOF
配置etcd启动文件
etcd-V3.3.X版本
- 不适用证书
cat > /usr/lib/systemd/system/etcd.service <<-EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
#User=etcd
WorkingDirectory=/ups/app/etcd/
EnvironmentFile=-/ups/app/etcd/cfg/etcd.conf
ExecStart=/ups/app/etcd/bin/etcd \
--name=\${ETCD_NAME} \
--data-dir=\${ETCD_DATA_DIR} \
--listen-peer-urls=\${ETCD_LISTEN_PEER_URLS} \
--listen-client-urls=\${ETCD_LISTEN_CLIENT_URLS} \
--advertise-client-urls=\${ETCD_ADVERTISE_CLIENT_URLS} \
--initial-advertise-peer-urls=\${ETCD_INITIAL_ADVERTISE_PEER_URLS} \
--initial-cluster=\${ETCD_INITIAL_CLUSTER} \
--initial-cluster-token=\${ETCD_INITIAL_CLUSTER_TOKEN} \
--initial-cluster-state=\${ETCD_INITIAL_CLUSTER_STATE} \
--log-output=stderr
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
- 使用证书
cat > /usr/lib/systemd/system/etcd.service <<-EOF
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
WorkingDirectory=/ups/app/etcd/
EnvironmentFile=-/ups/app/etcd/cfg/etcd.conf
ExecStart=/bin/bash -c "GOMAXPROCS=$(nproc) /ups/app/etcd/bin/etcd \
--name=\${ETCD_NAME} \
--data-dir=\${ETCD_DATA_DIR} \
--listen-peer-urls=\${ETCD_LISTEN_PEER_URLS} \
--listen-client-urls=\${ETCD_LISTEN_CLIENT_URLS} \
--advertise-client-urls=\${ETCD_ADVERTISE_CLIENT_URLS} \
--initial-advertise-peer-urls=\${ETCD_INITIAL_ADVERTISE_PEER_URLS} \
--initial-cluster=\${ETCD_INITIAL_CLUSTER} \
--initial-cluster-token=\${ETCD_INITIAL_CLUSTER_TOKEN} \
--initial-cluster-state=\${ETCD_INITIAL_CLUSTER_STATE} \
--auto-tls=\${ETCD_AUTO_TLS} \
--cert-file=\${ETCD_CERT_FILE} \
--key-file=\${ETCD_KEY_FILE} \
--peer-cert-file=\${ETCD_PEER_CERT_FILE} \
--peer-key-file=\${ETCD_PEER_KEY_FILE} \
--trusted-ca-file=\${ETCD_TRUSTED_CA_FILE} \
--client-cert-auth=\${ETCD_CLIENT_CERT_AUTH} \
--peer-client-cert-auth=\${ETCD_PEER_CLIENT_CERT_AUTH} \
--peer-trusted-ca-file=\${ETCD_PEER_TRUSTED_CA_FILE} \
--peer-auto-tls=\${ETCD_PEER_AUTO_TLS} "
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
etcd-V3.4.X版本
[Unit]
Description=Etcd Server
After=network.target
After=network-online.target
Wants=network-online.target
[Service]
Type=notify
# User=etcd
WorkingDirectory=/ups/app/etcd/
EnvironmentFile=-/ups/app/etcd/cfg/etcd.conf
ExecStart=/ups/app/etcd/bin/etcd --log-output=stderr
Restart=on-failure
RestartSec=10
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
在3.4.X的版本中,不能如下图同时配置参数文件和启动带选项
启动
服务方式启动etcd
systemctl daemon-reload
systemctl enable etcd
systemctl restart etcd
systemctl daemon-reload && systemctl restart etcd && systemctl status etcd
脚本方式启动etcd
pg1主机创建 main.sh
启动脚本,如下:
etcd --name etcd01 \
--initial-advertise-peer-urls http://192.168.10.190:2380 \
--listen-peer-urls http://192.168.10.190:2380 \
--listen-client-urls http://192.168.10.190:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://192.168.10.190:2379 \
--initial-cluster-token etcd-cluster-pg \
--initial-cluster etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380 \
--initial-cluster-state new \
--enable-v2
pg2 主机创建 main.sh
启动脚本,如下:
cd /ups/app/etcd
./etcd --name etcd02 \
--initial-advertise-peer-urls http://192.168.10.191:2380 \
--listen-peer-urls http://192.168.10.191:2380 \
--listen-client-urls http://192.168.10.191:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://192.168.10.191:2379 \
--initial-cluster-token etcd-cluster-pg \
--initial-cluster etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380 \
--initial-cluster-state existing \
--enable-v2
pg3 主机创建 main.sh
启动脚本,如下:
cd /ups/app/etcd
./etcd --name etcd03 \
--initial-advertise-peer-urls http://192.168.10.192:2380 \
--listen-peer-urls http://192.168.10.192:2380 \
--listen-client-urls http://192.168.10.192:2379,http://127.0.0.1:2379 \
--advertise-client-urls http://192.168.10.192:2379 \
--initial-cluster-token etcd-cluster-pg \
--initial-cluster etcd01=http://192.168.10.190:2380,etcd02=http://192.168.10.191:2380,etcd03=http://192.168.10.192:2380 \
--initial-cluster-state new \
--enable-v2
使用证书
etcd --name etcd01 \
--initial-advertise-peer-urls https://192.168.10.190:2380 \
--listen-peer-urls https://192.168.10.190:2380 \
--listen-client-urls https://192.168.10.190:2379,http://127.0.0.1:2379 \
--advertise-client-urls https://192.168.10.190:2379 \
--initial-cluster-token etcd-cluster-pg \
--initial-cluster etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380 \
--initial-cluster-state new \
--cert-file=/ups/app/etcd/ssl/etcd.pem \
--key-file=/ups/app/etcd/ssl/etcd-key.pem \
--peer-cert-file=/ups/app/etcd/ssl/etcd.pem \
--peer-key-file=/ups/app/etcd/ssl/etcd-key.pem \
--trusted-ca-file=/ups/app/etcd/ssl/ca.pem \
--peer-trusted-ca-file=/ups/app/etcd/ssl/ca.pem
etcd --name etcd02 \
--initial-advertise-peer-urls https://192.168.10.191:2380 \
--listen-peer-urls https://192.168.10.191:2380 \
--listen-client-urls https://192.168.10.191:2379,http://127.0.0.1:2379 \
--advertise-client-urls https://192.168.10.191:2379 \
--initial-cluster-token etcd-cluster-pg \
--initial-cluster etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380 \
--initial-cluster-state new \
--cert-file=/ups/app/etcd/ssl/etcd.pem \
--key-file=/ups/app/etcd/ssl/etcd-key.pem \
--peer-cert-file=/ups/app/etcd/ssl/etcd.pem \
--peer-key-file=/ups/app/etcd/ssl/etcd-key.pem \
--trusted-ca-file=/ups/app/etcd/ssl/ca.pem \
--peer-trusted-ca-file=/ups/app/etcd/ssl/ca.pem
etcd --name etcd03 \
--initial-advertise-peer-urls https://192.168.10.192:2380 \
--listen-peer-urls https://192.168.10.192:2380 \
--listen-client-urls https://192.168.10.192:2379,http://127.0.0.1:2379 \
--advertise-client-urls https://192.168.10.192:2379 \
--initial-cluster-token etcd-cluster-pg \
--initial-cluster etcd01=https://192.168.10.190:2380,etcd02=https://192.168.10.191:2380,etcd03=https://192.168.10.192:2380 \
--initial-cluster-state new \
--cert-file=/ups/app/etcd/ssl/etcd.pem \
--key-file=/ups/app/etcd/ssl/etcd-key.pem \
--peer-cert-file=/ups/app/etcd/ssl/etcd.pem \
--peer-key-file=/ups/app/etcd/ssl/etcd-key.pem \
--trusted-ca-file=/ups/app/etcd/ssl/ca.pem \
--peer-trusted-ca-file=/ups/app/etcd/ssl/ca.pem
启动etcd,如下:
# cat start.sh
#!/bin/sh
cd /ups/app/etcd
sh ./main.sh > etcd.log 2>&1 &
# 执行启动脚本
sh start.sh
验证
# 查看成员
etcdctl \
--ca-file=/ups/app/etcd/ssl/ca.pem \
--cert-file=/ups/app/etcd/ssl/etcd.pem \
--key-file=/ups/app/etcd/ssl/etcd-key.pem \
--endpoints=https://192.168.10.190:2379 member list
# 查看集群状态
etcdctl \
--endpoints=https://192.168.10.190:2379 \
--cert-file=/ups/app/etcd/ssl/etcd.pem \
--ca-file=/ups/app/etcd/ssl/ca.pem \
--key-file=/ups/app/etcd/ssl/etcd-key.pem \
cluster-health
ETCDCTL_API=3 etcdctl --endpoints=http://192.168.10.190:2379,http://192.168.10.191:2379,http://192.168.10.192:2379 endpoint health
ETCDCTL_API=2 etcdctl --endpoints "http://192.168.10.190:2379,http://192.168.10.191:2379,http://192.168.10.192:2379" member list
# -- etcd3.4
/ups/app/etcd/bin/etcd --version
/ups/app/etcd/bin/etcdctl endpoint health
/ups/app/etcd/bin/etcdctl endpoint status
/ups/app/etcd/bin/etcdctl member list
etcdctl管理工具
etcdctl 是一个命令行客户端,它能提供一些简洁的命令,供用户直接跟etcd服务打交道,而无需基于 HTTP API 方式。
各个版本不同命令选项
export ETCDCTL_API=2
export ETCDCTL_API=3
语法
AME:
etcdctl - A simple command line client for etcd.
WARNING:
Environment variable ETCDCTL_API is not set; defaults to etcdctl v2.
Set environment variable ETCDCTL_API=3 to use v3 API or ETCDCTL_API=2 to use v2 API.
USAGE:
etcdctl [global options] command [command options] [arguments...]
VERSION:
3.3.22
COMMANDS:
backup backup an etcd directory
cluster-health check the health of the etcd cluster
mk make a new key with a given value
mkdir make a new directory
rm remove a key or a directory
rmdir removes the key if it is an empty directory or a key-value pair
get retrieve the value of a key
ls retrieve a directory
set set the value of a key
setdir create a new directory or update an existing directory TTL
update update an existing key with a given value
updatedir update an existing directory
watch watch a key for changes
exec-watch watch a key for changes and exec an executable
member member add, remove and list subcommands
user user add, grant and revoke subcommands
role role add, grant and revoke subcommands
auth overall auth controls
help, h Shows a list of commands or help for one command
GLOBAL OPTIONS:
--debug output cURL commands which can be used to reproduce the request
--no-sync don't synchronize cluster information before sending request
--output simple, -o simple output response in the given format (simple, `extended` or `json`) (default: "simple")
--discovery-srv value, -D value domain name to query for SRV records describing cluster endpoints
--insecure-discovery accept insecure SRV records describing cluster endpoints
--peers value, -C value DEPRECATED - "--endpoints" should be used instead
--endpoint value DEPRECATED - "--endpoints" should be used instead
--endpoints value a comma-delimited list of machine addresses in the cluster (default: "http://127.0.0.1:2379,http://127.0.0.1:4001")
--cert-file value identify HTTPS client using this SSL certificate file
--key-file value identify HTTPS client using this SSL key file
--ca-file value verify certificates of HTTPS-enabled servers using this CA bundle
--username value, -u value provide username[:password] and prompt if password is not supplied.
--timeout value connection timeout per request (default: 2s)
--total-timeout value timeout for the command execution (except watch) (default: 5s)
--help, -h show help
--version, -v print the version
[root@pg3 ~]# etcdctl ls -h
NAME:
etcdctl ls - retrieve a directory
USAGE:
etcdctl ls [command options] [key]
OPTIONS:
--sort returns result in sorted order
--recursive, -r returns all key names recursively for the given path
-p append slash (/) to directories
--quorum, -q require quorum for get request
[root@pg3 ~]#
export ETCDCTL_API=3
[root@pg1 ~]# etcdctl --help
NAME:
etcdctl - A simple command line client for etcd3.
USAGE:
etcdctl
VERSION:
3.3.22
API VERSION:
3.3
COMMANDS:
get Gets the key or a range of keys
put Puts the given key into the store
del Removes the specified key or range of keys [key, range_end)
txn Txn processes all the requests in one transaction
compaction Compacts the event history in etcd
alarm disarm Disarms all alarms
alarm list Lists all alarms
defrag Defragments the storage of the etcd members with given endpoints
endpoint health Checks the healthiness of endpoints specified in `--endpoints` flag
endpoint status Prints out the status of endpoints specified in `--endpoints` flag
endpoint hashkv Prints the KV history hash for each endpoint in --endpoints
move-leader Transfers leadership to another etcd cluster member.
watch Watches events stream on keys or prefixes
version Prints the version of etcdctl
lease grant Creates leases
lease revoke Revokes leases
lease timetolive Get lease information
lease list List all active leases
lease keep-alive Keeps leases alive (renew)
member add Adds a member into the cluster
member remove Removes a member from the cluster
member update Updates a member in the cluster
member list Lists all members in the cluster
snapshot save Stores an etcd node backend snapshot to a given file
snapshot restore Restores an etcd member snapshot to an etcd directory
snapshot status Gets backend snapshot status of a given file
make-mirror Makes a mirror at the destination etcd cluster
migrate Migrates keys in a v2 store to a mvcc store
lock Acquires a named lock
elect Observes and participates in leader election
auth enable Enables authentication
auth disable Disables authentication
user add Adds a new user
user delete Deletes a user
user get Gets detailed information of a user
user list Lists all users
user passwd Changes password of user
user grant-role Grants a role to a user
user revoke-role Revokes a role from a user
role add Adds a new role
role delete Deletes a role
role get Gets detailed information of a role
role list Lists all roles
role grant-permission Grants a key to a role
role revoke-permission Revokes a key from a role
check perf Check the performance of the etcd cluster
help Help about any command
OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)
[root@pg1 ~]#
示例
# 查看所有数据
etcdctl ls -recursive
etcdctl --endpoints=http://127.0.0.1:2379 ls -recursive
# 删除 key名称 /service
root@pg1 ~]# etcdctl ls -recursive
/service
/service/etcd-cluster-pg
/service/etcd-cluster-pg/config
/service/etcd-cluster-pg/optime
/service/etcd-cluster-pg/optime/leader
/service/etcd-cluster-pg/history
/service/etcd-cluster-pg/members
/service/etcd-cluster-pg/initialize
[root@pg1 ~]# etcdctl rm /service
Error: 102: Not a file (/service) [6680]
[root@pg1 ~]# etcdctl rm -recursive /service
[root@pg1 ~]# etcdctl ls -recursive
[root@pg1 ~]#
zookeeper安装
安装配置ZK
# 安装zookeeper
tar -xf apache-zookeeper-3.6.0-bin.tar.gz -C /ups/app/zookeeper/
cd zookeeper-3.6.0
cp conf/zoo_sample.cfg conf/zoo.cfg
编辑配置文件
# vi conf/zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
dataDir=/ups/app/zookeeper/data
dataLogDir=/ups/app/zookeeper/log
# the port at which the clients will connect
clientPort=2181
# 配置Zookeeper集群信息
# server.[服务器编号]=[服务器地址]:[LF通信端口]:[选举端口]
# 服务器编号:必须与data/myid文件中的id一致
# LF通信端口: 服务器与集群中的leader交换信息的端口,一般选用相同的端口
# 选举端口: 选举新leader时服务器间相互通信的端口,一般选用相同的端口
server.1=192.168.10.190:2888:3888
server.2=192.168.10.191:2888:3888
server.3=192.168.10.192:2888:3888
配置创建mydi文件
3台服务器分别设置myid
# 192.168.10.190
echo "1" > data/myid
# 192.168.10.191
echo "2" > data/myid
# 192.168.10.192
echo "3" > data/myid
启动
# 依次启动服务
nohup sh ./bin/zkServer.sh start >/dev/null 2>&1 &
验证
# 检查状态
sh ./bin/zkServer.sh status
# 测试连接
./bin/zkCli.sh -server localhost:2181
Patroni 安装
export LD_LIBRARY_PATH=/ups/app/python3/lib:${LD_LIBRARY_PATH}
cd /ups/app/python3/bin
./python3 -m pip install --upgrade setuptools
./python3 -m pip install --upgrade pip
./python3 -m pip install psycopg2_binary
./python3 -m pip install patroni[etcd,consul]
# -- 或者
/ups/app/python3/bin/pip3 install psycopg2-binary -i https://mirrors.aliyun.com/pypi/simple/
/ups/app/python3/bin/pip3 install patroni -i https://mirrors.aliyun.com/pypi/simple/
# OR
/ups/app/python3/bin/pip3 install patroni[etcd,consul,zookeeper] -i https://mirrors.aliyun.com/pypi/simple/
过程
[root@pg1 bin]# cd /ups/app/python3/bin
[root@pg1 bin]# ./pip3 install patroni[etcd] -i https://mirrors.aliyun.com/pypi/simple/
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Collecting patroni[etcd]
Downloading https://mirrors.aliyun.com/pypi/packages/25/01/e4656c541ac648a530fc1b6094324969f9f2ed8d7005ad0fa2598cbf1199/patroni-1.6.5-py3-none-any.whl (178kB)
|████████████████████████████████| 184kB 425kB/s
Collecting psutil>=2.0.0 (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/aa/3e/d18f2c04cf2b528e18515999b0c8e698c136db78f62df34eee89cee205f1/psutil-5.7.2.tar.gz (460kB)
|████████████████████████████████| 460kB 1.4MB/s
Collecting six>=1.7 (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/ee/ff/48bde5c0f013094d729fe4b0316ba2a24774b3ff1c52d924a8a4cb04078a/six-1.15.0-py2.py3-none-any.whl
Collecting prettytable>=0.7 (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/ef/30/4b0746848746ed5941f052479e7c23d2b56d174b82f4fd34a25e389831f5/prettytable-0.7.2.tar.bz2
Collecting click>=4.1 (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/d2/3d/fa76db83bf75c4f8d338c2fd15c8d33fdd7ad23a9b5e57eb6c5de26b430e/click-7.1.2-py2.py3-none-any.whl (82kB)
|████████████████████████████████| 92kB 1.3MB/s
Collecting urllib3!=1.21,>=1.19.1 (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/9f/f0/a391d1463ebb1b233795cabfc0ef38d3db4442339de68f847026199e69d7/urllib3-1.25.10-py2.py3-none-any.whl (127kB)
|████████████████████████████████| 133kB 323kB/s
Collecting PyYAML (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/64/c2/b80047c7ac2478f9501676c988a5411ed5572f35d1beff9cae07d321512c/PyYAML-5.3.1.tar.gz (269kB)
|████████████████████████████████| 276kB 383kB/s
Collecting cdiff (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/69/6c/301876940e760a8b46c1caacf08c298f511f517c70eec32e43f38e9cc6f5/cdiff-1.0.tar.gz
Collecting python-dateutil (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/d4/70/d60450c3dd48ef87586924207ae8907090de0b306af2bce5d134d78615cb/python_dateutil-2.8.1-py2.py3-none-any.whl (227kB)
|████████████████████████████████| 235kB 943kB/s
Collecting python-etcd<0.5,>=0.4.3; extra == "etcd" (from patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/a1/da/616a4d073642da5dd432e5289b7c1cb0963cc5dde23d1ecb8d726821ab41/python-etcd-0.4.5.tar.gz
Collecting dnspython>=1.13.0 (from python-etcd<0.5,>=0.4.3; extra == "etcd"->patroni[etcd])
Downloading https://mirrors.aliyun.com/pypi/packages/90/49/cb426577c28ca3e35332815b795a99e467523843fc83cc85ca0d6be2515a/dnspython-2.0.0-py3-none-any.whl (208kB)
|████████████████████████████████| 215kB 2.8MB/s
Installing collected packages: psutil, six, prettytable, click, urllib3, PyYAML, cdiff, python-dateutil, dnspython, python-etcd, patroni
Running setup.py install for psutil ... done
Running setup.py install for prettytable ... done
Running setup.py install for PyYAML ... done
Running setup.py install for cdiff ... done
Running setup.py install for python-etcd ... done
Successfully installed PyYAML-5.3.1 cdiff-1.0 click-7.1.2 dnspython-2.0.0 patroni-1.6.5 prettytable-0.7.2 psutil-5.7.2 python-dateutil-2.8.1 python-etcd-0.4.5 six-1.15.0 urllib3-1.25.10
WARNING: You are using pip version 19.2.3, however version 20.1.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.
结果确认
[root@temp bin]# ./pip3 install patroni -i https://mirrors.aliyun.com/pypi/simple/
Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Requirement already satisfied: patroni in /ups/app/python3/lib/python3.8/site-packages (1.6.5)
Requirement already satisfied: PyYAML in /ups/app/python3/lib/python3.8/site-packages (from patroni) (5.3.1)
Requirement already satisfied: click>=4.1 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (7.1.2)
Requirement already satisfied: prettytable>=0.7 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (0.7.2)
Requirement already satisfied: cdiff in /ups/app/python3/lib/python3.8/site-packages (from patroni) (1.0)
Requirement already satisfied: python-dateutil in /ups/app/python3/lib/python3.8/site-packages (from patroni) (2.8.1)
Requirement already satisfied: psutil>=2.0.0 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (5.7.2)
Requirement already satisfied: urllib3!=1.21,>=1.19.1 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (1.25.10)
Requirement already satisfied: six>=1.7 in /ups/app/python3/lib/python3.8/site-packages (from patroni) (1.15.0)
[root@temp bin]#
find / -name 'patroni'
/ups/app/python3/bin/patroni
/ups/app/python3/lib/python3.8/site-packages/patroni
服务文件配置
vi /usr/lib/systemd/system/patroni.service
cat <<-EOF >/usr/lib/systemd/system/patroni.service
[Unit]
Description=Runners to orchestrate a high-availability PostgreSQL - patroni
Documentation=https://patroni.readthedocs.io/en/latest/index.html
After=syslog.target network.target etcd.target
Wants=network-online.target
[Service]
Type=simple
User=postgres
Group=postgres
ExecStart=/ups/app/python3/bin/patroni /etc/patroni/patroni.yml
ExecReload=/bin/kill -s HUP $MAINPID
KillMode=process
KillSignal=SIGINT
LimitNOFILE=65536
Restart=on-abnormal
RestartSec=30s
TimeoutSec=0
[Install]
WantedBy=multi-user.target
EOF
patroni参数配置
patroni读取PG参数顺序
- postgresql.base.conf
- postgresql.conf
- postgresql.auto.conf
- run-time parameter (即运行时alter命令设置的属性)
配置patroni文件
Patroni 使用的是YAML的方式来进行配置,配置文件的非常严谨。
mkdir -p /etc/patroni
chown -R postgres:postgres /etc/patroni
vi /etc/patroni/patroni.yml
grep -Ev "^[ \t]*(#|$)" /etc/patroni/patroni.yml
启动patroni服务时,将$PGDATA/postgresql.conf重命名为$PGDATA/postgresql.base.conf,然后将/etc/patroni.yml文件中配置的postgresql项内容覆盖写入到$PGDATA/postgresql.conf文件
配置说明
包括全局参数、restapi模块参数、etcd|zookeeper|consul模块参数、bootstrap启动参数、postgresql模块参数。
-
Global 设置
/<namespace>/<scope>/config
- name 集群名集群内的机器必须唯一,每台机器有自己的名字
- namespace 存储配置信息的区域路径(保持默认)
- scope 集群的名字(同 postgresql.conf 的 cluster_name 参数)
-
log 的配置
- level 设置日志的等级
- format 设置日志的等级 默认的设置是 asctime levelname message
- dateformat 设置时间格式
- dir 要写入程序日志的目录,目录必须存在并且是patroni 用户编写并且可以由您设置此值。应用程序将默认保留4个25MB 的日志。
- file_num 要保留的日志的数量
- file_size patroni.log的尺寸
- loggers: 定义允许日志等级
-
引导配置:
- DCS: 在集群的全局配置,更改参数需要在 DCS 中或听过API 进行更改。
- loop_wait 循环休眠的描述 默认 10秒
- ttl: TTL获取先导锁。可以将其视为启动自动故障转移过程之前的时间长度。默认值:30
- retry_timeout: 分布式程序和POSTGRESQL 之间的失联后多长时间不触发切换。
- maximum_lag_on_failover:从库和主库之间在可以能进行主从切换中运行的字节差距。
- master_start_timeout 主库在故障转移中的时间容忍度,loop_wait + master_start_timeout+loop_wait
- synchronous_mode 打开这个模式将选择与主库最接近的从库作为可的新主库
- synchronous_mode_strict :打开这个模式将如果发现没有和主库进行数据复制的从库,则主库将禁止写入数据。
- DCS: 在集群的全局配置,更改参数需要在 DCS 中或听过API 进行更改。
详见附录
Etcd方案
- 主机1配置yml文件
scope: etcd-cluster-pg
namespace: /service/
name: pg1
restapi:
listen: 192.168.10.190:8008
connect_address: 192.168.10.190:8008
etcd:
#Provide host to do the initial discovery of the cluster topology:
# host: 192.168.10.190:2379
hosts: 192.168.10.190:2379,192.168.10.191:2379,192.168.10.192:2379
# protocol: https
# cacert: /ups/app/etcd/ssl/ca.pem
# cert: /ups/app/etcd/ssl/etcd.pem
# key: /ups/app/etcd/ssl/etcd-key.pem
bootstrap:
# this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
# and all other cluster members will use it as a `global configuration`
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
master_start_timeout: 300
synchronous_mode: false
postgresql:
use_pg_rewind: true
use_slots: true
parameters:
port: 2020
listen_addresses: "*"
wal_level: locical
hot_standby: "on"
wal_keep_segments: 64
max_wal_senders: 10
max_replication_slots: 10
wal_log_hints: "on"
# archive_mode: "on"
hot_standby: on
# archive_timeout: 1800s
postgresql:
listen: 0.0.0.0:2020
connect_address: 192.168.10.190:2020
data_dir: /ups/data/pgdata/12/pg_root
bin_dir: /ups/app/postgresql/pgsql-12/bin
# config_dir: /ups/data/pgdata/12/pg_root
pgpass: /home/postgres/.pgpass
authentication:
replication:
username: sync
password: sync12345
superuser:
username: postgres
password: postgres
#rewind: # Has no effect on postgres 10 and lower
#username: pg_rewind
#password:
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
其它节点需修改全局参数name、restapi模块的listen和connect_address参数、etcd模块的host参数,以及postgresql模块的connect_address参数。
- 主机2配置yml文件
scope: etcd-cluster-pg
namespace: /service/
name: pg2
restapi:
listen: 192.168.10.191:8008
connect_address: 192.168.10.191:8008
etcd:
#Provide host to do the initial discovery of the cluster topology:
hosts: 192.168.10.190:2379,192.168.10.191:2379,192.168.10.192:2379
# protocol: https
# cacert: /ups/app/etcd/ssl/ca.pem
# cert: /ups/app/etcd/ssl/etcd.pem
# key: /ups/app/etcd/ssl/etcd-key.pem
bootstrap:
# this section will be written into Etcd:/<namespace>/<scope>/config after initializing new cluster
# and all other cluster members will use it as a `global configuration`
dcs:
ttl: 30
loop_wait: 10
retry_timeout: 10
maximum_lag_on_failover: 1048576
master_start_timeout: 300
synchronous_mode: false
postgresql:
use_pg_rewind: true
use_slots: true
parameters:
wal_level: locical
hot_standby: "on"
wal_keep_segments: 64
max_wal_senders: 10
max_replication_slots: 10
wal_log_hints: "on"
archive_mode: "on"
hot_standby: on
archive_timeout: 1800s
postgresql:
listen: 0.0.0.0:2020
connect_address: 192.168.10.191:2020
data_dir: /ups/data/pgdata/12/pg_root
bin_dir: /ups/app/postgresql/pgsql-12/bin
#config_dir: /ups/data/pgdata/12/pg_root
pgpass: /home/postgres/.pgpass
authentication:
replication:
username: sync
password: sync12345
superuser:
username: postgres
password:
# rewind: # Has no effect on postgres 10 and lower
# username: pg_rewind
# password:
tags:
nofailover: false
noloadbalance: false
clonefrom: false
nosync: false
zookeeper方案
替换上面etcd项为ZooKeeper即可
zookeeper:
hosts: ['192.168.10.190:2181','192.168.10.191:2181','192.168.10.192:2181']
启动服务
三台主机分别启动 patroni ,如下:
/ups/app/python3/bin/patroni /etc/patroni.yml> /tmp/pg_patroni.log 2>&1 &
/ups/app/python3/bin/patroni /etc/patroni.yml> /tmp/pg_patroni.log 2>&1 &
/ups/app/python3/bin/patroni /etc/patroni.yml> /tmp/pg_patroni.log 2>&1 &
# 服务方式启动
systemctl daemon-reload && systemctl start patroni
启动Patroni,在host1上
- Patroni1 把本地PostgreSQL(postgresql1)的信息写入etcd.
- Patroni1 监测到数据库目录(/home/rudi/pgdata/)是空的,于是初始化数据库(initdb -D /home/rudi/pgdata)
- Patroni1 配置本地数据库相关的配置文件,例如:postgresql.conf, pg_hba.conf
- Patroni1 启动本地数据库(postgresql1): pg_ctl -D /home/rudi/pgdata start
- Patroni1 把本地数据库(postgresql1)设定为主数据库(Primary)
启动Patroni,在host2/host3上
- Patroni2/Patroni3 基于postgresql1做数据库备份(pg_basebackup),创建各自的本地数据库
- Patroni2/Patroni3 配置本地数据库相关的配置文件,例如:postgresql.conf, pg_hba.conf
- Patroni2 启动postgresql2,作为从库(Standby)
- Patroni3 启动postgresql3,作为从库(Standby)
建议手工方式配置流复制,不建议通过patroni方式配置主从环境
Patronictl 基本操作
语法
[root@pg3 ~]# patronictl --help
Usage: patronictl [OPTIONS] COMMAND [ARGS]...
Options:
-c, --config-file TEXT Configuration file
-d, --dcs TEXT Use this DCS
-k, --insecure Allow connections to SSL sites without certs
--help Show this message and exit.
Commands:
configure Create configuration file
dsn Generate a dsn for the provided member, defaults to a dsn of...
edit-config Edit cluster configuration
failover Failover to a replica
flush Discard scheduled events (restarts only currently)
history Show the history of failovers/switchovers
list List the Patroni members for a given Patroni
pause Disable auto failover
query Query a Patroni PostgreSQL member
reinit Reinitialize cluster member
reload Reload cluster member configuration
remove Remove cluster from DCS
restart Restart cluster member
resume Resume auto failover
scaffold Create a structure for the cluster in DCS
show-config Show cluster configuration
switchover Switchover to a replica
version Output version of patronictl command or a running Patroni...
[root@pg3 ~]#
示例
patronictl -c /etc/patroni/patroni_postgresql.yml show-config
patronictl -c /etc/patroni/patroni_postgresql.yml list
patronictl -c /etc/patroni/patroni_postgresql.yml edit-config
# 删除属性
patronictl -c /etc/patroni/patroni_postgresql.yml edit-config -s postgresql.parameters.synchronous_standby_names=null
# 重启数据库
patronictl -c /etc/patroni/patroni_postgresql.yml restart pgha
查看 patroni 集群
patronictl -c /etc/patroni.yml list
patronictl -c /etc/patroni.yml list etcd-cluster-pg
patronictl -d etcd://pg1:2379 list etcd-cluster-pg
# 输出结果
+ Cluster: etcd-cluster-pg (6854432365693308402) -+----+-----------+
| Member | Host | Role | State | TL | Lag in MB |
+--------+---------------------+--------+---------+----+-----------+
| pg1 | 192.168.10.190:2020 | Leader | running | 1 | |
| pg2 | 192.168.10.191:2020 | | running | 1 | 0 |
| pg3 | 192.168.10.192:2020 | | running | 1 | 0 |
+--------+---------------------+--------+---------+----+-----------+
[root@pg1 ~]# etcdctl ls --recursive --sort -p /service
/service/etcd-cluster-pg/
/service/etcd-cluster-pg/config
/service/etcd-cluster-pg/initialize
/service/etcd-cluster-pg/leader
/service/etcd-cluster-pg/members/
/service/etcd-cluster-pg/members/pg1
/service/etcd-cluster-pg/members/pg2
/service/etcd-cluster-pg/members/pg3
/service/etcd-cluster-pg/optime/
/service/etcd-cluster-pg/optime/leader
[root@pg1 ~]#
[root@pg1 ~]# etcdctl get /service/etcd-cluster-pg/members/pg1
{"conn_url":"postgres://192.168.10.190:2020/postgres","api_url":"http://192.168.10.190:8008/patroni","state":"running","role":"master","version":"1.6.5","xlog_location":201335568,"timeline":1}
查看 cluster 状态
curl -s "http://192.168.10.190:8008/cluster" | jq .
curl -s "http://192.168.10.191:8008/cluster" | jq .
curl -s "http://192.168.10.192:8008/cluster" | jq .
[postgres@pg1 ~]$ curl -s "http://192.168.10.190:8008/patroni" | jq .
{
"members": [
{
"name": "pgsql12_pg1",
"role": "leader",
"state": "running",
"api_url": "http://192.168.10.190:8008/patroni",
"host": "192.168.10.190",
"port": 2020,
"timeline": 2
},
{
"name": "pgsql12_pg2",
"role": "replica",
"state": "running",
"api_url": "http://192.168.10.191:8008/patroni",
"host": "192.168.10.191",
"port": 2020,
"timeline": 2,
"lag": 0
},
{
"name": "pgsql12_pg3",
"role": "replica",
"state": "running",
"api_url": "http://192.168.10.192:8008/patroni",
"host": "192.168.10.192",
"port": 2020,
"timeline": 2,
"lag": 0
}
]
}
查看 patroni 节点状态
curl -s "http://192.168.10.190:8008/patroni" | jq .
curl -s "http://192.168.10.191:8008/patroni" | jq .
curl -s "http://192.168.10.192:8008/patroni" | jq .
[root@pg1 ~]# curl -s "http://192.168.10.190:8008/patroni" | jq .
{
"state": "running",
"postmaster_start_time": "2020-07-29 10:25:32.214 CST",
"role": "master",
"server_version": 120002,
"cluster_unlocked": false,
"xlog": {
"location": 201335568
},
"timeline": 1,
"replication": [
{
"usename": "sync",
"application_name": "pg2",
"client_addr": "192.168.10.191",
"state": "streaming",
"sync_state": "sync",
"sync_priority": 1
},
{
"usename": "sync",
"application_name": "pg3",
"client_addr": "192.168.10.192",
"state": "streaming",
"sync_state": "potential",
"sync_priority": 1
}
],
"database_system_identifier": "6854432365693308402",
"patroni": {
"version": "1.6.5",
"scope": "etcd-cluster-pg"
}
}
更多查询命令参考 Patroni REST API。
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· .NET10 - 预览版1新功能体验(一)