Loading

etcd+coredns

简介

etcd是使用Go语言开发的一个开源的、高可用的分布式key-value存储系统,可以用于配置共享和服务的注册和发现。

CoreDNS 其实就是一个 DNS 服务,所以很多开源项目以及工程师都会使用 CoreDNS 为集群提供服务发现的功能

ETCD 安装

使用yum安装,最简化配置,非集群,生成环境建议部署etcd集群。 安装

# yum install etcd -y

启动etcd

# systemctl start etcd

设置开机启动

# systemctl enable etcd

coredns安装

下载二进制版本:https://github.com/coredns/coredns/releases 解压安装

# tar zxvf coredns_1.3.0_linux_amd64.tgz
# mv coredns /usr/local/bin
# mkdir /etc/coredns
添加主配置文件 vi /etc/coredns/Corefile,内容如下:
vi /etc/coredns/Corefile
.:53 {    # 监听tcp和udp的53端口
    etcd {   # 配置启用etcd插件,后面可以指定域名,例如 etcd test.com {
        stubzones # 启用存根区域功能。 stubzone仅在位于指定的第一个区域下方的etcd树中完成
        path /coredns # etcd里面的路径 默认为/skydns,以后所有的dns记录就是存储在该存根路径底下
        endpoint http://localhost:2379 # etcd访问地址,多个空格分开
        
        # upstream设置要使用的上游解析程序解决指向外部域名的在etcd(认为CNAME)中找到的外部域名。
        upstream 8.8.8.8:53 8.8.4.4:53 /etc/resolv.conf 
        
        fallthrough # 如果区域匹配但不能生成记录,则将请求传递给下一个插件
        # tls CERT KEY CACERT # 可选参数,etcd认证证书设置
    }
    prometheus  # 监控插件
    cache 160
    loadbalance   # 负载均衡,开启DNS记录轮询策略
    proxy . 8.8.8.8:53 8.8.4.4:53 /etc/resolv.conf  # 上面etcd未查询到的请求转发给设置的DNS服务器解析
    log # 打印日志
}

启动coredns

# nohup /usr/local/bin/ -conf /etc/coredns/Corefile > /tmp/coredns.log 2>&1 &

也可以将coredns注册成服务 vim /usr/lib/systemd/system/coredns.service 添加如下内容

[Unit]
Description=CoreDNS DNS server
Documentation=https://coredns.io
After=network.target

[Service]
Type=simple
EnvironmentFile=-/etc/default/coredns
PermissionsStartOnly=true
LimitNOFILE=1048576
LimitNPROC=512
CapabilityBoundingSet=CAP_NET_BIND_SERVICE 
AmbientCapabilities=CAP_NET_BIND_SERVICE 
NoNewPrivileges=true 
User=root
# WorkingDirectory=~
ExecStart=/usr/local/bin/coredns $ARGS 
ExecReload=/bin/kill -SIGUSR1 $MAINPID
Restart=on-failure

[Install]
WantedBy=multi-user.target
# systemctl daemon-reload 
# systemctl start coredns

测试命令

# dig @127.1 www.baidu.com +short
www.a.shifen.com.
www.wshifen.com.
103.235.46.39

ETCD集群配置

一般etcd集群需要3个或3个以上的奇数节点,我们这里使用两个节点测试看会是什么情况

node1 10.180.11.3,node2 10.180.11.2

[root@node1 ~]# cat /etc/etcd/etcd.conf
#[Member]
#ETCD_CORS=""
#ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
ETCD_DATA_DIR="/home/etcd/data.etcd"
#ETCD_WAL_DIR=""
#ETCD_LISTEN_PEER_URLS="http://localhost:2380"
ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
#ETCD_MAX_SNAPSHOTS="5"
#ETCD_MAX_WALS="5"
ETCD_NAME="etcd_dns_node1"
#ETCD_SNAPSHOT_COUNT="100000"
#ETCD_HEARTBEAT_INTERVAL="100"
#ETCD_ELECTION_TIMEOUT="1000"
#ETCD_QUOTA_BACKEND_BYTES="0"
#ETCD_MAX_REQUEST_BYTES="1572864"
#ETCD_GRPC_KEEPALIVE_MIN_TIME="5s"
#ETCD_GRPC_KEEPALIVE_INTERVAL="2h0m0s"
#ETCD_GRPC_KEEPALIVE_TIMEOUT="20s"
#
#[Clustering]
#ETCD_INITIAL_ADVERTISE_PEER_URLS="http://localhost:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.3:2380"
#ETCD_ADVERTISE_CLIENT_URLS="http://localhost:2379"
ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.3:2379"
#ETCD_DISCOVERY=""
#ETCD_DISCOVERY_FALLBACK="proxy"
#ETCD_DISCOVERY_PROXY=""
#ETCD_DISCOVERY_SRV=""
ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
#ETCD_STRICT_RECONFIG_CHECK="true"
#ETCD_ENABLE_V2="true"
#
#[Proxy]
#ETCD_PROXY="off"
#ETCD_PROXY_FAILURE_WAIT="5000"
#ETCD_PROXY_REFRESH_INTERVAL="30000"
#ETCD_PROXY_DIAL_TIMEOUT="1000"
#ETCD_PROXY_WRITE_TIMEOUT="5000"
#ETCD_PROXY_READ_TIMEOUT="0"
#
#[Security]
#ETCD_CERT_FILE=""
#ETCD_KEY_FILE=""
#ETCD_CLIENT_CERT_AUTH="false"
#ETCD_TRUSTED_CA_FILE=""
#ETCD_AUTO_TLS="false"
#ETCD_PEER_CERT_FILE=""
#ETCD_PEER_KEY_FILE=""
#ETCD_PEER_CLIENT_CERT_AUTH="false"
#ETCD_PEER_TRUSTED_CA_FILE=""
#ETCD_PEER_AUTO_TLS="false"
#
#[Logging]
#ETCD_DEBUG="false"
#ETCD_LOG_PACKAGE_LEVELS=""
#ETCD_LOG_OUTPUT="default"
#
#[Unsafe]
#ETCD_FORCE_NEW_CLUSTER="false"
#
#[Version]
#ETCD_VERSION="false"
#ETCD_AUTO_COMPACTION_RETENTION="0"
#
#[Profiling]
#ETCD_ENABLE_PPROF="false"
#ETCD_METRICS="basic"
#
#[Auth]
#ETCD_AUTH_TOKEN="simple"
[root@node1 ~]#


[root@node1 ~]# cat /etc/etcd/etcd.conf
#[Member]
ETCD_DATA_DIR="/home/etcd/data.etcd"
ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
ETCD_NAME="etcd_dns_node2"
#
#[Clustering]
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379"
ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER_STATE="existing"
#
#[Member]
#ETCD_CORS=""
#ETCD_DATA_DIR="/var/lib/etcd/default.etcd"
#ETCD_DATA_DIR="/home/etcd/data.etcd"
#ETCD_WAL_DIR=""
#ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
#ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
#ETCD_MAX_SNAPSHOTS="5"
#ETCD_MAX_WALS="5"
#ETCD_NAME="etcd_dns_node2"
#ETCD_SNAPSHOT_COUNT="100000"
#ETCD_HEARTBEAT_INTERVAL="100"
#ETCD_ELECTION_TIMEOUT="1000"
#ETCD_QUOTA_BACKEND_BYTES="0"
#ETCD_MAX_REQUEST_BYTES="1572864"
#ETCD_GRPC_KEEPALIVE_MIN_TIME="5s"
#ETCD_GRPC_KEEPALIVE_INTERVAL="2h0m0s"
#ETCD_GRPC_KEEPALIVE_TIMEOUT="20s"
#
#[Clustering]
#ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
#ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379"
#ETCD_DISCOVERY=""
#ETCD_DISCOVERY_FALLBACK="proxy"
#ETCD_DISCOVERY_PROXY=""
#ETCD_DISCOVERY_SRV=""
#ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
#ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
#ETCD_INITIAL_CLUSTER_STATE="existing"
#ETCD_STRICT_RECONFIG_CHECK="true"
#ETCD_ENABLE_V2="true"
#
#[Proxy]
#ETCD_PROXY="off"
#ETCD_PROXY_FAILURE_WAIT="5000"
#ETCD_PROXY_REFRESH_INTERVAL="30000"
#ETCD_PROXY_DIAL_TIMEOUT="1000"
#ETCD_PROXY_WRITE_TIMEOUT="5000"
#ETCD_PROXY_READ_TIMEOUT="0"
#
#[Security]
#ETCD_CERT_FILE=""
#ETCD_KEY_FILE=""
#ETCD_CLIENT_CERT_AUTH="false"
#ETCD_TRUSTED_CA_FILE=""
#ETCD_AUTO_TLS="false"
#ETCD_PEER_CERT_FILE=""
#ETCD_PEER_KEY_FILE=""
#ETCD_PEER_CLIENT_CERT_AUTH="false"
#ETCD_PEER_TRUSTED_CA_FILE=""
#ETCD_PEER_AUTO_TLS="false"
#
#[Logging]
#ETCD_DEBUG="false"
#ETCD_LOG_PACKAGE_LEVELS=""
#ETCD_LOG_OUTPUT="default"
#
#[Unsafe]
#ETCD_FORCE_NEW_CLUSTER="false"
#
#[Version]
#ETCD_VERSION="false"
#ETCD_AUTO_COMPACTION_RETENTION="0"
#
#[Profiling]
#ETCD_ENABLE_PPROF="false"
#ETCD_METRICS="basic"
#
#[Auth]
#ETCD_AUTH_TOKEN="simple"
[root@node1 ~]#

coredns配置

节点1

[root@dns-1 ~]# cat /etc/coredns/Corefile
.:53 {
etcd {
stubzones
path /skydns
endpoint http://10.180.11.3:2379
upstream 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf 
fallthrough
}
#cache 160
forward . 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf
log
errors
}
[root@dns-1 ~]#

节点2

[root@dns-2 ~]# cat /etc/coredns/Corefile
.:53 {
etcd {
stubzones
path /skydns
endpoint http://10.180.11.3:2379
upstream 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf 
fallthrough
}
#cache 160
forward . 114.114.114.114:53 114.114.115.115:53 /etc/resolv.conf
log
errors
}
[root@dns-2 ~]#

coredns注册成系统服务

在两个节点将coredns注册成服务

[root@dns-1 ~]# cat /usr/lib/systemd/system/coredns.service
[Unit]
Description=CoreDNS DNS server
Documentation=https://coredns.io
After=network.target

[Service]
PermissionsStartOnly=true
LimitNOFILE=1048576
LimitNPROC=512
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
AmbientCapabilities=CAP_NET_BIND_SERVICE
NoNewPrivileges=true
User=root
ExecStart=/usr/sbin/coredns -quiet -conf /etc/coredns/Corefile
ExecReload=/bin/kill -SIGUSR1 $MAINPID
Restart=on-failure

[Install]
WantedBy=multi-user.target
[root@dns-1 ~]#

配置开机启动

systemctl enable etcd
systemctl enable coredns

配置etcd API 版本为3

[root@dns-1 ~]# cat .bash_profile
export ETCDCTL_API=3

启动etcd 和coredns[

systemctl start etcd
systemctl start coredns

etcdct l 命令帮助

[root@dns-1 ~]# etcdctl -h
NAME:
etcdctl - A simple command line client for etcd3.

USAGE:
etcdctl

VERSION:
3.3.11

API VERSION:
3.3


COMMANDS:
get Gets the key or a range of keys
put Puts the given key into the store
del Removes the specified key or range of keys [key, range_end)
txn Txn processes all the requests in one transaction
compaction Compacts the event history in etcd
alarm disarm Disarms all alarms
alarm list Lists all alarms
defrag Defragments the storage of the etcd members with given endpoints
endpoint health Checks the healthiness of endpoints specified in `--endpoints` flag
endpoint status Prints out the status of endpoints specified in `--endpoints` flag
endpoint hashkv Prints the KV history hash for each endpoint in --endpoints
move-leader Transfers leadership to another etcd cluster member.
watch Watches events stream on keys or prefixes
version Prints the version of etcdctl
lease grant Creates leases
lease revoke Revokes leases
lease timetolive Get lease information
lease list List all active leases
lease keep-alive Keeps leases alive (renew)
member add Adds a member into the cluster
member remove Removes a member from the cluster
member update Updates a member in the cluster
member list Lists all members in the cluster
snapshot save Stores an etcd node backend snapshot to a given file
snapshot restore Restores an etcd member snapshot to an etcd directory
snapshot status Gets backend snapshot status of a given file
make-mirror Makes a mirror at the destination etcd cluster
migrate Migrates keys in a v2 store to a mvcc store
lock Acquires a named lock
elect Observes and participates in leader election
auth enable Enables authentication
auth disable Disables authentication
user add Adds a new user
user delete Deletes a user
user get Gets detailed information of a user
user list Lists all users
user passwd Changes password of user
user grant-role Grants a role to a user
user revoke-role Revokes a role from a user
role add Adds a new role
role delete Deletes a role
role get Gets detailed information of a role
role list Lists all roles
role grant-permission Grants a key to a role
role revoke-permission Revokes a key from a role
check perf Check the performance of the etcd cluster
help Help about any command

OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)

通过etcdctl设置设置key:

(1)添加删除etcd数据库中的A记录

添加记录命令

etcd put /skydns/com/test1/www/v4 '{"host":"1.1.2.3"}'

删除记录

[root@dns-1 ~]# etcdctl get / --prefix
/skydns/com/test1/www/v4
{"host":"1.1.2.3"}
/skydns/migu/test/www/v4
{"host":"1.1.1.1"}
/skydns/migu/test1/www/v4
{"host":"1.1.2.2"}
[root@dns-1 ~]# etcdctl del /skydns/com/ -p
Error: unknown shorthand flag: 'p' in -p
NAME:
del - Removes the specified key or range of keys [key, range_end)

USAGE:
etcdctl del [options] <key> [range_end]

OPTIONS:
--from-key[=false] delete keys that are greater than or equal to the given key using byte compare
--prefix[=false] delete keys with matching prefix
--prev-kv[=false] return deleted key-value pairs

GLOBAL OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)


[root@dns-1 ~]# etcdctl del /skydns/com/ --prefix
1
[root@dns-1 ~]# etcdctl get / --prefix
/skydns/migu/test/www/v4
{"host":"1.1.1.1"}
/skydns/migu/test1/www/v4
{"host":"1.1.2.2"}
[root@dns-1 ~]# etcdctl del / --prefix
2
[root@dns-1 ~]# etcdctl get / --prefix
[root@dns-1 ~]#

  非常奇怪,两个节点是集群,在一个节点进行数据的增加删除,其他节点会同步进行的,但在节点2删除数据后节点1上的数据还在。在之前集群状态是正常的,数据也是在一个几点添加的,后来为了测试 snapshot restore的功能,在做了 snapshot save后删除了源数据,并进行了 snapshot restore,修改数据文件属主属组为etcd,并成功在两个节点启动etcd,测试数据已经恢复。所以并没有查看集群的同步状态。、因为etcd集群需要超过半数的节点存活,否则整个集群将不可用。

[root@dns-2 ~]# etcdctl get / --prefix
/skydns/com/test1/www/v4
{"host":"1.1.2.3"}
/skydns/migu/test/www/v4
{"host":"1.1.1.1"}
/skydns/migu/test1/www/v4
{"host":"1.1.2.2"}
[root@dns-2 ~]# etcdctl member list
8e9e05c52164694d, started, etcd_dns_node1, http://localhost:2380, http://10.180.11.3:2379
[root@dns-2 ~]# etcdctl endpoint status
127.0.0.1:2379, 8e9e05c52164694d, 3.3.11, 16 kB, true, 2, 4
[root@dns-2 ~]# etcdctl endpoint health
127.0.0.1:2379 is healthy: successfully committed proposal: took = 500.505µs
[root@dns-2 ~]# etcdctl endpoint health --write-out="table"
127.0.0.1:2379 is healthy: successfully committed proposal: took = 503.821µs
[root@dns-2 ~]# etcdctl endpoint health --write-out="table"
127.0.0.1:2379 is healthy: successfully committed proposal: took = 1.066859ms
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" endpoint health
127.0.0.1:2379 is healthy: successfully committed proposal: took = 492.012µs
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" endpoint status
+----------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+----------------+------------------+---------+---------+-----------+-----------+------------+
| 127.0.0.1:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+----------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2,10.180.11.3 endpoint status
Error: dial tcp: address 10.180.11.3: missing port in address
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 7 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health
10.180.11.3:2379 is healthy: successfully committed proposal: took = 529.95µs
10.180.11.2:2379 is healthy: successfully committed proposal: took = 472.66

那重启一下etcd看看会是什么情况

[root@dns-2 ~]# systemctl restart etcd
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health
10.180.11.2:2379 is healthy: successfully committed proposal: took = 513.726µs
10.180.11.3:2379 is healthy: successfully committed proposal: took = 613.431µs

[root@dns-1 ~]# systemctl restart etcd
[root@dns-1 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 9 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-1 ~]#

[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 7 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-2 ~]# etcdctl member list
8e9e05c52164694d, started, etcd_dns_node1, http://localhost:2380, http://10.180.11.3:2379

可以正常重启,但集群还是分裂的,尝试在节点2将节点1加入集群,因为两边数据不一致,估计不会成功

[root@dns-2 ~]# etcdctl member add -h
NAME:
member add - Adds a member into the cluster

USAGE:
etcdctl member add <memberName> [options]

OPTIONS:
--peer-urls="" comma separated peer URLs for the new member.

GLOBAL OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)

[root@dns-2 ~]# etcdctl member add http://10.180.11.2:2380
Error: member peer urls not provided.
[root@dns-2 ~]# etcdctl member add --peer-urls="http://10.180.11.2:2380"
Error: member name not provided.
[root@dns-2 ~]# etcdctl member add etcd-dns-cluster --peer-urls="http://10.180.11.2:2380"
Member 5d81d60b777579ed added to cluster cdf818194e3a8c32

ETCD_NAME="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER="etcd-dns-cluster=http://10.180.11.2:2380,etcd_dns_node1=http://localhost:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
ETCD_INITIAL_CLUSTER_STATE="existing"
[root@dns-2 ~]#
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 9 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | false | 3 | 7 |
+------------------+------------------+---------+---------+-----------+-----------+------------+

可以看出虽然dns-1已经添加到集群但 节点2的状态是异常的

[root@dns-2 ~]# etcdctl get / --prefix
Error: context deadline exceeded
[root@dns-2 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint health
10.180.11.2:2379 is healthy: successfully committed proposal: took = 552.875µs
10.180.11.3:2379 is unhealthy: failed to commit proposal: context deadline exceeded
Error: unhealthy cluster
[root@dns-2 ~]#
[root@dns-1 ~]# etcdctl get / --prefix
[root@dns-1 ~]#

尝试下关闭两个节点etcd,再逐个节点启动etcd

先启节点1

[root@dns-1 ~]# systemctl stop etcd
[root@dns-1 ~]# systemctl start etcd
[root@dns-1 ~]# etcdctl get / --prefix
[root@dns-1 ~]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
Failed to get the status of endpoint 10.180.11.3:2379 (context deadline exceeded)
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 5 | 13 |
+------------------+------------------+---------+---------+-----------+-----------+------------+

再启节点2,节点2etcd无法启动

[root@dns-2 ~]# systemctl start etcd
Job for etcd.service failed because a timeout was exceeded. See "systemctl status etcd.service" and "journalctl -xe" for details.

节点1使用之前创建的快照恢复到和节点2一样,并重启etcd

[root@dns-1 ~]# systemctl stop etcd
[root@dns-1 ~]# cd /home/etcd/
[root@dns-1 etcd]# ll
total 0
drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd
drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak
drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup
[root@dns-1 etcd]# mv data.etcd data.etcd.20200528.bak
[root@dns-1 etcd]# etcdctl snapshot restor -h
NAME:
snapshot restore - Restores an etcd member snapshot to an etcd directory

USAGE:
etcdctl snapshot restore <filename> [options]

OPTIONS:
--data-dir="" Path to the data directory
--initial-advertise-peer-urls="http://localhost:2380" List of this member's peer URLs to advertise to the rest of the cluster
--initial-cluster="default=http://localhost:2380" Initial cluster configuration for restore bootstrap
--initial-cluster-token="etcd-cluster" Initial cluster token for the etcd cluster during restore bootstrap
--name="default" Human-readable name for this member
--skip-hash-check[=false] Ignore snapshot integrity hash value (required if copied from data directory)
--wal-dir="" Path to the WAL directory (use --data-dir if none given)

GLOBAL OPTIONS:
--cacert="" verify certificates of TLS-enabled secure servers using this CA bundle
--cert="" identify secure client using this TLS certificate file
--command-timeout=5s timeout for short running command (excluding dial timeout)
--debug[=false] enable client-side debug logging
--dial-timeout=2s dial timeout for client connections
-d, --discovery-srv="" domain name to query for SRV records describing cluster endpoints
--endpoints=[127.0.0.1:2379] gRPC endpoints
--hex[=false] print byte strings as hex encoded strings
--insecure-discovery[=true] accept insecure SRV records describing cluster endpoints
--insecure-skip-tls-verify[=false] skip server certificate verification
--insecure-transport[=true] disable transport security for client connections
--keepalive-time=2s keepalive time for client connections
--keepalive-timeout=6s keepalive timeout for client connections
--key="" identify secure client using this TLS key file
--user="" username[:password] for authentication (prompt if password is not supplied)
-w, --write-out="simple" set the output format (fields, json, protobuf, simple, table)

从快照恢复节点1的数据

[root@dns-1 etcd]# etcdctl snapshot restor /home/etcd/etcd_backup/etcd_20200527113646.db --data-dir="/home/etcd/data.etcd"
2020-05-28 11:09:15.687417 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32
[root@dns-1 etcd]#
[root@dns-1 etcd]# ll
total 0
drwx------ 3 root root 20 May 28 11:09 data.etcd
drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd.20200528.bak
drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak
drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup
[root@dns-1 etcd]# chown etcd:etcd data.etcd
[root@dns-1 etcd]# ll
total 0
drwx------ 3 etcd etcd 20 May 28 11:09 data.etcd
drwx------ 3 etcd etcd 20 May 28 11:00 data.etcd.20200528.bak
drwx------ 3 etcd etcd 20 May 26 19:33 data.etcd.bak
drwxr-x--- 2 root root 36 May 27 11:36 etcd_backup
[root@dns-1 etcd]#
[root@dns-1 etcd]# systemctl start etcd        <<==========启动节点1 etcd  报错
Job for etcd.service failed because the control process exited with error code. See "systemctl status etcd.service" and "journalctl -xe" for details.
[root@dns-1 etcd]# journalctl -xe
-- The result is failed.
May 28 11:09:56 dns-1.novalocal systemd[1]: Unit etcd.service entered failed state.
May 28 11:09:56 dns-1.novalocal systemd[1]: etcd.service failed.
May 28 11:09:57 dns-1.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart.
May 28 11:09:57 dns-1.novalocal systemd[1]: start request repeated too quickly for etcd.service
May 28 11:09:57 dns-1.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
May 28 11:09:57 dns-1.novalocal systemd[1]: Unit etcd.service entered failed state.
May 28 11:09:57 dns-1.novalocal systemd[1]: etcd.service failed.
May 28 11:10:01 dns-1.novalocal systemd[1]: Started Session 5722 of user root.
-- Subject: Unit session-5722.scope has finished start-up
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit session-5722.scope has finished starting up.
--
-- The start-up result is done.
May 28 11:10:01 dns-1.novalocal systemd[1]: Starting Session 5722 of user root.
-- Subject: Unit session-5722.scope has begun start-up
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit session-5722.scope has begun starting up.
May 28 11:10:01 dns-1.novalocal CROND[18120]: (root) CMD (/usr/lib64/sa/sa1 1 1)
May 28 11:10:15 dns-1.novalocal sshd[18126]: Connection closed by 127.0.0.1 port 54794 [preauth]
[root@dns-1 etcd]#
[root@dns-1 etcd]# chown etcd:etcd data.etcd -R         <<==========修改数据文件权限
[root@dns-1 etcd]#
[root@dns-1 etcd]# systemctl start etcd            <<============启动节点1 etcd ,正常启动
[root@dns-1 etcd]#
[root@dns-1 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
Failed to get the status of endpoint 10.180.11.3:2379 (context deadline exceeded)
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-1 etcd]#

再启节点2

[root@dns-2 ~]# systemctl start etcd
Job for etcd.service failed because a timeout was exceeded. See "systemctl status etcd.service" and "journalctl -xe" for details.
**关掉node1,节点2 恢复快照重启并启动etcd**
[root@dns-1 etcd]# systemctl stop etcd
[root@dns-2 etcd]# rm -rf data.etcd
[root@dns-2 etcd]#
[root@dns-2 etcd]# ll
total 20
drwx------. 3 etcd etcd 20 May 26 19:14 data.etcd.bak
-rw-r-----. 1 root root 16416 May 27 11:45 etcd_20200527113646.db
[root@dns-2 etcd]#
[root@dns-2 etcd]# etcdctl snapshot restor /home/etcd/etcd_20200527113646.db --data-dir="/home/etcd/data.etcd"
2020-05-28 11:22:06.237276 I | etcdserver/membership: added member 8e9e05c52164694d [http://localhost:2380] to cluster cdf818194e3a8c32
[root@dns-2 etcd]#
[root@dns-2 etcd]# ll
total 20
drwx------. 3 root root 20 May 28 11:22 data.etcd
drwx------. 3 etcd etcd 20 May 26 19:14 data.etcd.bak
-rw-r-----. 1 root root 16416 May 27 11:45 etcd_20200527113646.db
[root@dns-2 etcd]# chown etcd:etcd data.etcd -R
[root@dns-2 etcd]# systemctl start etcd           <<==========节点2 etcd 正常启动

再启节点1

[root@dns-1 etcd]# systemctl start etcd      
[root@dns-1 etcd]#
[root@dns-1 etcd]# etcdctl --write-out="table" --endpoints=10.180.11.2:2379,10.180.11.3:2379 endpoint status
+------------------+------------------+---------+---------+-----------+-----------+------------+
| ENDPOINT | ID | VERSION | DB SIZE | IS LEADER | RAFT TERM | RAFT INDEX |
+------------------+------------------+---------+---------+-----------+-----------+------------+
| 10.180.11.2:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 3 | 6 |
| 10.180.11.3:2379 | 8e9e05c52164694d | 3.3.11 | 16 kB | true | 2 | 4 |
+------------------+------------------+---------+---------+-----------+-----------+------------+
[root@dns-1 etcd]#

etcd集群添加节点

[root@dns-1 ~]# cat /etc/etcd/etcd.conf |grep -v "#"
ETCD_DATA_DIR="/home/etcd/data.etcd"
ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
ETCD_NAME="etcd_dns_node2"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.2:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.2:2379"
ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380,etcd_dns_node3=http://10.180.11.17:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER_STATE="new"
[root@dns-1 ~]#
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl member add --peer-urls="http://10.180.11.17:2380"
Error: member name not provided.
[root@dns-1 ~]# etcdctl member add etcd_dns_node3 --peer-urls="http://10.180.11.17:2380"
Member 13db2c9d1758b274 added to cluster b45f8b203d965968

ETCD_NAME="etcd_dns_node3"
ETCD_INITIAL_CLUSTER="etcd_dns_node3=http://10.180.11.17:2380,etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.17:2380"
ETCD_INITIAL_CLUSTER_STATE="existing"
[root@dns-1 ~]#
[root@dns-1 ~]#
[root@dns-1 ~]# etcdctl member list
13db2c9d1758b274, unstarted, , http://10.180.11.17:2380,
2820e5fdaeb09ba6, started, etcd_dns_node1, http://10.180.11.3:2380, http://10.180.11.3:2379
523a2b4083effb13, started, etcd_dns_node2, http://10.180.11.2:2380, http://10.180.11.2:2379

启动新节点

[root@dns-3 ~]# systemctl start etcd
Job for etcd.service failed because the control process exited with error code. See "systemctl status etcd.service" and "journalctl -xe" for details.
[root@dns-3 ~]#
[root@dns-3 ~]#
[root@dns-3 ~]# journalctl -xe
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Starting Etcd Server...
-- Subject: Unit etcd.service has begun start-up
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has begun starting up.
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_ADVERTISE_CLIENT_URLS=http://10.180.11.17:2379
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_ADVERTISE_PEER_URLS=http://10.180.11.17:2380
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER=etcd_dns_node1=http://10.180.11.3:2380,etc
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER_STATE=existing
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_INITIAL_CLUSTER_TOKEN=etcd-dns-cluster
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized and used environment variable ETCD_LISTEN_PEER_URLS=http://0.0.0.0:2380
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_NAME, but unused: shadowed by corresponding flag
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_DATA_DIR, but unused: shadowed by corresponding flag
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: recognized environment variable ETCD_LISTEN_CLIENT_URLS, but unused: shadowed by corresponding flag
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: etcd Version: 3.3.11
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Git SHA: 2cf9e51
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Go Version: go1.10.3
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: Go OS/Arch: linux/amd64
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: setting maximum number of CPUs to 4, total number of available CPUs is 4
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: listening for peers on http://0.0.0.0:2380
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: listening for client requests on 0.0.0.0:2379
Jul 13 22:23:41 dns-3.novalocal etcd[12609]: cannot access data directory: mkdir /home/etcd: permission denied
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service: main process exited, code=exited, status=1/FAILURE
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service holdoff time over, scheduling restart.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: start request repeated too quickly for etcd.service
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Failed to start Etcd Server.
-- Subject: Unit etcd.service has failed
-- Defined-By: systemd
-- Support: http://lists.freedesktop.org/mailman/listinfo/systemd-devel
--
-- Unit etcd.service has failed.
--
-- The result is failed.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: Unit etcd.service entered failed state.
Jul 13 22:23:41 dns-3.novalocal systemd[1]: etcd.service failed.
[root@dns-3 ~]#
[root@dns-3 ~]# grep -v "#" /etc/etcd/etcd.conf
ETCD_DATA_DIR="/home/etcd/data.etcd"
ETCD_LISTEN_PEER_URLS="http://0.0.0.0:2380"
ETCD_LISTEN_CLIENT_URLS="http://0.0.0.0:2379"
ETCD_NAME="etcd_dns_node3"
ETCD_INITIAL_ADVERTISE_PEER_URLS="http://10.180.11.17:2380"
ETCD_ADVERTISE_CLIENT_URLS="http://10.180.11.17:2379"
ETCD_INITIAL_CLUSTER="etcd_dns_node1=http://10.180.11.3:2380,etcd_dns_node2=http://10.180.11.2:2380,etcd_dns_node3=http://10.180.11.17:2380"
ETCD_INITIAL_CLUSTER_TOKEN="etcd-dns-cluster"
ETCD_INITIAL_CLUSTER_STATE="existing"
[root@dns-3 ~]# mkdir -p /home/etcd/data.etcd
[root@dns-3 ~]# id etcd
uid=997(etcd) gid=994(etcd) groups=994(etcd)
[root@dns-3 ~]# chown etcd:etcd /home/etcd -R
[root@dns-3 ~]# systemctl start etcd
[root@dns-3 ~]# systemctl status etcd
â— etcd.service - Etcd Server
Loaded: loaded (/usr/lib/systemd/system/etcd.service; disabled; vendor preset: disabled)
Active: active (running) since Mon 2020-07-13 22:25:46 CST; 18s ago
Main PID: 12635 (etcd)
CGroup: /system.slice/etcd.service
└─12635 /usr/bin/etcd --name=etcd_dns_node3 --data-dir=/home/etcd/data.etcd --listen-client-urls=http://0.0.0.0:2379

Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 2820e5fdaeb09ba6 [http://10.180.11.3:2380] to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 523a2b4083effb13 [http://10.180.11.2:2380] to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: set the initial cluster version to 3.3
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: enabled capabilities for version 3.3
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: added member 13db2c9d1758b274 [http://10.180.11.17:2380] to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: ready to serve client requests
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: published {Name:etcd_dns_node3 ClientURLs:[http://10.180.11.17:2379]} to cluster b45f8b203d965968
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: serving insecure client requests on [::]:2379, this is strongly discouraged!
Jul 13 22:25:46 dns-3.novalocal systemd[1]: Started Etcd Server.
Jul 13 22:25:46 dns-3.novalocal etcd[12635]: 13db2c9d1758b274 initialzed peer connection; fast-forwarding 8 ticks (election ticks 10) with... peer(s)
Hint: Some lines were ellipsized, use -l to show in full.
[root@dns-3 ~]# etcdctl member list
13db2c9d1758b274, started, etcd_dns_node3, http://10.180.11.17:2380, http://10.180.11.17:2379
2820e5fdaeb09ba6, started, etcd_dns_node1, http://10.180.11.3:2380, http://10.180.11.3:2379
523a2b4083effb13, started, etcd_dns_node2, http://10.180.11.2:2380, http://10.180.11.2:2379
[root@dns-3 ~]#

参考:

http://www.topgoer.com/数据库操作/go操作etcd/etcd介绍.html

https://coredns.io/

posted @ 2021-04-14 13:15  五月的麦田  阅读(1186)  评论(1编辑  收藏  举报