nomad 集群部署

Nomad 高可用搭建

0. 服务器初始化

部署前清空 防火墙 或者 部署后清空防火墙重启

iptables -F
# systemctl restart docker consul nomad 

sysctl -w vm.max_map_count=262144
sysctl -a | grep vm.max_map_count

1. 安装 consul

官方文档:https://learn.hashicorp.com/tutorials/consul/deployment-guide

1.1 准备部署目录

mkdir /tmp/consul/ -p && cd /tmp/consul/ 

1.2 下载并解压

export CONSUL_VERSION="1.11.2"
export CONSUL_URL="https://releases.hashicorp.com/consul"
curl --remote-name \
  ${CONSUL_URL}/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_linux_amd64.zip
curl --silent --remote-name \
  ${CONSUL_URL}/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_SHA256SUMS
curl --silent --remote-name \
  ${CONSUL_URL}/${CONSUL_VERSION}/consul_${CONSUL_VERSION}_SHA256SUMS.sig

unzip consul_${CONSUL_VERSION}_linux_amd64.zip
sudo chown root:root consul
sudo mv consul /usr/bin/
consul --version

1.3 开启自动补全

consul -autocomplete-install
complete -C /usr/bin/consul consul

1.4 创建其数据目录

sudo useradd --system --home /etc/consul.d --shell /bin/false consul
sudo mkdir --parents /opt/consul
sudo chown --recursive consul:consul /opt/consul

1.5 验证安装

consul

1.6 准备配置文件

mkdir /etc/consul.d/ -p 
cat <<EOF > /etc/consul.d/consul.hcl  
datacenter = "dc1"              #运行代理的数据中心。
data_dir = "/opt/consul"        #代理用于存储状态的数据目录
bind_addr = "10.103.3.42"       #应绑定到的内部群集通信的地址。默认情况下,这是"0.0.0.0",这意味着 Consul 将绑定到本地计算机上的所有地址,可不指定,当服务器有多个网卡可能会报错
retry_join = ["10.103.3.40"]    #启动时要加入的另一个代理的地址,可不指定 会自动发现
client_addr = "0.0.0.0"         #Consul 将客户端接口绑定到的地址,包括 HTTP 和 DNS 服务器。默认情况下。默认情况下,它是"127.0.0.1",仅允许环回连接

server = true                   #此标志用于控制代理是否处于服务器或客户端模式
bootstrap_expect = 3            #此标志提供数据中心中预期服务器的数量。不应提供此值,或者该值应在数据中心的所有服务器上保持一致

ui_config {                     # 启动ui 默认情况下,UI 绑定到 client_addr ,不建议都启用ui
  enabled = true
}
EOF

sudo chown --recursive consul:consul /etc/consul.d
sudo chmod 640 /etc/consul.d/consul.hcl

1.7 配置 systemd

cat <<EOF > /etc/systemd/system/consul.service
[Unit]
Description="HashiCorp Consul - A service mesh solution"
Documentation=https://www.consul.io/
Requires=network-online.target
After=network-online.target
ConditionFileNotEmpty=/etc/consul.d/consul.hcl

[Service]
EnvironmentFile=-/etc/consul.d/consul.env
User=consul
Group=consul
ExecStart=/usr/bin/consul agent -config-dir=/etc/consul.d/
ExecReload=/bin/kill --signal HUP \$MAINPID
KillMode=process
KillSignal=SIGTERM
Restart=on-failure
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF

1.8 启动服务

#检查您的配置文件是否有效 并启动
sudo consul validate /etc/consul.d/
sudo systemctl enable consul
sudo systemctl restart consul
sudo systemctl status consul

# 查看集群状态
consul members

补充:consul-template 部署

wget https://releases.hashicorp.com/consul-template/0.27.2/consul-template_0.27.2_linux_amd64.zip
unzip consul-template_0.27.2_linux_amd64.zip
mv consul-template /usr/bin/

2. 部署 nomad 集群

官方网站:https://learn.hashicorp.com/tutorials/nomad/production-deployment-guide-vm-with-consul?in=nomad/enterprise

2.1 准备部署目录

mkdir /tmp/nomad/ -p && cd /tmp/nomad/ 

2.2 下载并解压

export NOMAD_VERSION="1.2.6"
curl --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip
# 安装 Nomad
unzip nomad_${NOMAD_VERSION}_linux_amd64.zip
sudo chown root:root nomad
sudo mv nomad /usr/bin/
nomad version

2.3 开启自动补全

nomad -autocomplete-install
complete -C /usr/bin/nomad nomad

2.4 创建其数据目录

sudo mkdir --parents /opt/nomad
sudo useradd --system --home /etc/nomad.d --shell /bin/false nomad

2.6 准备配置文件

sudo mkdir --parents /etc/nomad.d
sudo chmod 700 /etc/nomad.d
sudo cat <<EOF > /etc/nomad.d/nomad.hcl
datacenter = "dc1"
data_dir = "/opt/nomad"
bind_addr = "0.0.0.0"
EOF

sudo cat <<EOF >  /etc/nomad.d/server.hcl
server {
  enabled = true   #指定此代理是否应在服务器模式下运行
  bootstrap_expect = 3 #群集中预期的服务器数。
}
EOF

sudo  cat <<EOF > /etc/nomad.d/client.hcl
client {
  enabled = true  # 指定此代理是否应在客户端模式下运行 主节点不指定 将没有工作节点功能
}
EOF

2.7 配置 systemd

sudo cat <<EOF > /etc/systemd/system/nomad.service
[Unit]
Description=Nomad
Documentation=https://www.nomadproject.io/docs/
Wants=network-online.target
After=network-online.target

[Service]
#  nomad server 应以nomad用户身份运行。Nomad 客户端应以 root 用户身份运行。
User=root
Group=root

ExecReload=/bin/kill -HUP \$MAINPID
ExecStart=/usr/bin/nomad agent -config /etc/nomad.d
KillMode=process
KillSignal=SIGINT
LimitNOFILE=65536
LimitNPROC=infinity
Restart=on-failure
RestartSec=2

TasksMax=infinity
OOMScoreAdjust=-1000

[Install]
WantedBy=multi-user.target
EOF

2.8 启动服务

sudo systemctl enable nomad
sudo systemctl restart nomad
sudo systemctl status nomad

# 查看 server 和 node 节点
nomad server members
nomad node status

3. docker 安装

# yum 安装
yum-config-manager --add-repo https://mirrors.aliyun.com/docker-ce/linux/centos/docker-ce.repo
yum install docker-ce-19.03.* -y

# 准备配置文件
mkdir /etc/docker
# docker 的最大下载数 日志保存大小 重启docker 不重启容器
cat  <<EOF > /etc/docker/daemon.json 
{
 "registry-mirrors": [
    "https://registry.docker-cn.com",
    "http://hub-mirror.c.163.com",
    "https://docker.mirrors.ustc.edu.cn"
  ],
 "exec-opts": ["native.cgroupdriver=systemd"],
 "max-concurrent-downloads": 10,
 "max-concurrent-uploads": 5,
 "log-opts": {
   "max-size": "300m",
   "max-file": "2"
 },
 "live-restore": true
}
EOF

#所有节点设置开机自启动Docker并起来
systemctl daemon-reload && systemctl enable --now docker
docker version

4. 安装 dnsmasq

用于consul服务发现的转发 DNS
官方文档:https://learn.hashicorp.com/tutorials/consul/dns-forwarding

4.1 yum 安装启动

yum install dnsmasq -y 
$ cat /etc/dnsmasq.conf 
resolv-file=/etc/dnsmasq.d/resolv.dnsmasq.conf
all-servers

no-negcache
cache-size=50000

min-port=32768
max-port=60999

#addn-hosts=/etc/dnsmasq.d/customized_hosts
local-ttl=3600

log-queries=extra
#log-facility=/var/log/dnsmasq/dnsmasq.log

server=/consul/127.0.0.1#8600
cat /etc/dnsmasq.d/resolv.dnsmasq.conf 
nameserver 119.29.29.29     # tencent dns
nameserver 223.5.5.5        # ali dns
nameserver 114.114.114.114  # 114
nameserver 8.8.8.8          # google dns
systemctl restart dnsmasq
systemctl status dnsmasq

4.2 测试

curl localhost:8500/v1/catalog/nodes  # 获取 Node 字段的值
dig @127.0.0.1 -p 8600 dx-lt-yd-hebei-shijiazhuang-10-10-103-3-40.node.consul
nslookup dx-lt-yd-hebei-shijiazhuang-10-10-103-3-40.node.consul

补充

使用了ceph-csi的配置

cat <<EOC >> /etc/nomad.d/client.hcl 
plugin "docker" {
  config {
    allow_privileged = true
  }
}
EOC

systemctl restart nomad

sudo modprobe rbd;
sudo lsmod |grep rbd;

nomad 更新流程

export NOMAD_VERSION="1.2.6"
curl --remote-name https://releases.hashicorp.com/nomad/${NOMAD_VERSION}/nomad_${NOMAD_VERSION}_linux_amd64.zip
# 安装 Nomad
unzip nomad_${NOMAD_VERSION}_linux_amd64.zip
sudo chown root:root nomad
sudo mv nomad /usr/bin/
nomad version
systemctl restart nomad

nomad server members
posted @ 2022-02-08 15:50  鸣昊  阅读(1113)  评论(4编辑  收藏  举报