nomad 集群部署
Nomad 高可用搭建
0. 服务器初始化
部署前清空 防火墙 或者 部署后清空防火墙重启
iptables -F
# systemctl restart docker consul nomad
sysctl -w vm.max_map_count=262144
sysctl -a | grep vm.max_map_count
1. 安装 consul
官方文档:https://learn.hashicorp.com/tutorials/consul/deployment-guide
1.1 准备部署目录
mkdir /tmp/consul/ -p && cd /tmp/consul/
1.2 下载并解压
export CONSUL_VERSION="1.11.2"
export CONSUL_URL="https://releases.hashicorp.com/consul"
curl --remote-name \
${CONSUL_URL}/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip
curl --silent --remote-name \
${CONSUL_URL}/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_SHA256SUMS
curl --silent --remote-name \
${CONSUL_URL}/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_SHA256SUMS.sig
unzip consul_${CONSUL_VERSION}_linux_amd64.zip
sudo chown root:root consul
sudo mv consul /usr/bin/
consul --version
1.3 开启自动补全
consul -autocomplete-install
complete -C /usr/bin/consul consul
1.4 创建其数据目录
sudo useradd --system --home /etc/consul.d --shell /bin/false consul
sudo mkdir --parents /opt/consul
sudo chown --recursive consul:consul /opt/consul
1.5 验证安装
consul
1.6 准备配置文件
mkdir /etc/consul.d/ -p
cat <<EOF > /etc/consul.d/consul.hcl
datacenter = "dc1" #运行代理的数据中心。
data_dir = "/opt/consul" #代理用于存储状态的数据目录
bind_addr = "10.103.3.42" #应绑定到的内部群集通信的地址。默认情况下,这是"0.0.0.0",这意味着 Consul 将绑定到本地计算机上的所有地址,可不指定,当服务器有多个网卡可能会报错
retry_join = ["10.103.3.40"] #启动时要加入的另一个代理的地址,可不指定 会自动发现
client_addr = "0.0.0.0" #Consul 将客户端接口绑定到的地址,包括 HTTP 和 DNS 服务器。默认情况下。默认情况下,它是"127.0.0.1",仅允许环回连接
server = true #此标志用于控制代理是否处于服务器或客户端模式
bootstrap_expect = 3 #此标志提供数据中心中预期服务器的数量。不应提供此值,或者该值应在数据中心的所有服务器上保持一致
ui_config { # 启动ui 默认情况下,UI 绑定到 client_addr ,不建议都启用ui
enabled = true
}
EOF
sudo chown --recursive consul:consul /etc/consul.d
sudo chmod 640 /etc/consul.d/consul.hcl
1.7 配置 systemd
cat <<EOF > /etc/systemd/system/consul.service
[Unit]
Description="HashiCorp Consul - A service mesh solution"
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/consul.d/consul.hcl
[Service]
EnvironmentFile=-/etc/consul.d/consul.env
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -config-dir=/etc/consul.d/
ExecReload=/bin/kill --signal HUP \$MAINPID
KillMode=process
KillSignal=SIGTERM
Restart=on-failure
LimitNOFILE=65536
[Install]
WantedBy=multi-user.target
EOF
1.8 启动服务
#检查您的配置文件是否有效 并启动
sudo consul validate /etc/consul.d/
sudo systemctl enable consul
sudo systemctl restart consul
sudo systemctl status consul
# 查看集群状态
consul members
补充:consul-template 部署
wget https://releases.hashicorp.com/consul-template/0.27.2/consul-template_0.27.2_linux_amd64.zip
unzip consul-template_0.27.2_linux_amd64.zip
mv consul-template /usr/bin/
2. 部署 nomad 集群
2.1 准备部署目录
mkdir /tmp/nomad/ -p && cd /tmp/nomad/
2.2 下载并解压
export NOMAD_VERSION="1.2.6"
curl --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip
# 安装 Nomad
unzip nomad_${NOMAD_VERSION}_linux_amd64.zip
sudo chown root:root nomad
sudo mv nomad /usr/bin/
nomad version
2.3 开启自动补全
nomad -autocomplete-install
complete -C /usr/bin/nomad nomad
2.4 创建其数据目录
sudo mkdir --parents /opt/nomad
sudo useradd --system --home /etc/nomad.d --shell /bin/false nomad
2.6 准备配置文件
sudo mkdir --parents /etc/nomad.d
sudo chmod 700 /etc/nomad.d
sudo cat <<EOF > /etc/nomad.d/nomad.hcl
datacenter = "dc1"
data_dir = "/opt/nomad"
bind_addr = "0.0.0.0"
EOF
sudo cat <<EOF > /etc/nomad.d/server.hcl
server {
enabled = true #指定此代理是否应在服务器模式下运行
bootstrap_expect = 3 #群集中预期的服务器数。
}
EOF
sudo cat <<EOF > /etc/nomad.d/client.hcl
client {
enabled = true # 指定此代理是否应在客户端模式下运行 主节点不指定 将没有工作节点功能
}
EOF
2.7 配置 systemd
sudo cat <<EOF > /etc/systemd/system/nomad.service
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/docs/
Wants=network-online.target
After=network-online.target
[Service]
# nomad server 应以nomad用户身份运行。Nomad 客户端应以 root 用户身份运行。
User=root
Group=root
ExecReload=/bin/kill -HUP \$MAINPID
ExecStart=/usr/bin/nomad agent -config /etc/nomad.d
KillMode=process
KillSignal=SIGINT
LimitNOFILE=65536
LimitNPROC=infinity
Restart=on-failure
RestartSec=2
TasksMax=infinity
OOMScoreAdjust=-1000
[Install]
WantedBy=multi-user.target
EOF
2.8 启动服务
sudo systemctl enable nomad
sudo systemctl restart nomad
sudo systemctl status nomad
# 查看 server 和 node 节点
nomad server members
nomad node status
3. docker 安装
# yum 安装
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install docker-ce-19.03.* -y
# 准备配置文件
mkdir /etc/docker
# docker 的最大下载数 日志保存大小 重启docker 不重启容器
cat <<EOF > /etc/docker/daemon.json
{
"registry-mirrors": [
"https://registry.docker-cn.com",
"http://hub-mirror.c.163.com",
"https://docker.mirrors.ustc.edu.cn"
],
"exec-opts": ["native.cgroupdriver=systemd"],
"max-concurrent-downloads": 10,
"max-concurrent-uploads": 5,
"log-opts": {
"max-size": "300m",
"max-file": "2"
},
"live-restore": true
}
EOF
#所有节点设置开机自启动Docker并起来
systemctl daemon-reload && systemctl enable --now docker
docker version
4. 安装 dnsmasq
用于consul服务发现的转发 DNS
官方文档:https://learn.hashicorp.com/tutorials/consul/dns-forwarding
4.1 yum 安装启动
yum install dnsmasq -y
$ cat /etc/dnsmasq.conf
resolv-file=/etc/dnsmasq.d/resolv.dnsmasq.conf
all-servers
no-negcache
cache-size=50000
min-port=32768
max-port=60999
#addn-hosts=/etc/dnsmasq.d/customized_hosts
local-ttl=3600
log-queries=extra
#log-facility=/var/log/dnsmasq/dnsmasq.log
server=/consul/127.0.0.1#8600
cat /etc/dnsmasq.d/resolv.dnsmasq.conf
nameserver 119.29.29.29 # tencent dns
nameserver 223.5.5.5 # ali dns
nameserver 114.114.114.114 # 114
nameserver 8.8.8.8 # google dns
systemctl restart dnsmasq
systemctl status dnsmasq
4.2 测试
curl localhost:8500/v1/catalog/nodes # 获取 Node 字段的值
dig @127.0.0.1 -p 8600 dx-lt-yd-hebei-shijiazhuang-10-10-103-3-40.node.consul
nslookup dx-lt-yd-hebei-shijiazhuang-10-10-103-3-40.node.consul
补充
使用了ceph-csi的配置
cat <<EOC >> /etc/nomad.d/client.hcl
plugin "docker" {
config {
allow_privileged = true
}
}
EOC
systemctl restart nomad
sudo modprobe rbd;
sudo lsmod |grep rbd;
nomad 更新流程
export NOMAD_VERSION="1.2.6"
curl --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip
# 安装 Nomad
unzip nomad_${NOMAD_VERSION}_linux_amd64.zip
sudo chown root:root nomad
sudo mv nomad /usr/bin/
nomad version
systemctl restart nomad
nomad server members