使用 docker 的方式部署 NFS server 提供文件共享能力

构建 NFS server 镜像

参考项目：sjiveson/nfs-server-alpine，itsthenetwork/nfs-server-alpine

这个项目在 Github 算是比较久远了，作者也只是三年前更新了一下启动脚本的拼写检查，作者构建的镜像也停留在五年前了

我把作者的项目（sjiveson-nfs-server-alpine 目录下），以及下面我自己的修改，都上传到我自己的 Gitee 上面了，方便那些没有魔法的同志们：docker-nfs-server-alpine)

准备 Dockerfile

FROM docker.m.daocloud.io/alpine:3.20

ENV LANG="en_US.UTF-8"
ENV LANGUAGE="en_US:en"
ENV LC_ALL="en_US.UTF-8"

COPY nfsd.sh /usr/bin/nfsd
COPY .bashrc /root/.bashrc

RUN echo "https://mirrors.tuna.tsinghua.edu.cn/alpine/v3.20/main" > /etc/apk/repositories && \
    echo "https://mirrors.tuna.tsinghua.edu.cn/alpine/v3.20/community" >> /etc/apk/repositories && \
    apk add --no-cache --update --verbose nfs-utils bash iproute2 && \
    rm -rf /var/cache/apk /tmp /sbin/halt /sbin/poweroff /sbin/reboot && \
    mkdir -p /var/lib/nfs/rpc_pipefs /var/lib/nfs/v4recovery && \
    echo "rpc_pipefs   /var/lib/nfs/rpc_pipefs   rpc_pipefs   defaults 0 0" >> /etc/fstab && \
    echo "nfsd         /proc/fs/nfsd             nfsd         defaults 0 0" >> /etc/fstab && \
    chmod +x /usr/bin/nfsd

ENTRYPOINT ["/usr/bin/nfsd"]

准备 .bashrc 文件

# General Aliases
alias apk='apk --progress'
alias ll="ls -ltanh"

alias hosts='cat /etc/hosts'
alias ..="cd .."
alias ...="cd ../.."
alias ....="cd ../../.."
alias untar="tar xvkf"
alias mv="mv -nv"
alias cp="cp -i"
alias ip4="ip -4 addr"
alias ip6="ip -6 addr"

COL_YEL="\[\e[1;33m\]"
COL_GRA="\[\e[0;37m\]"
COL_WHI="\[\e[1;37m\]"
COL_GRE="\[\e[1;32m\]"
COL_RED="\[\e[1;31m\]"

# Bash Prompt
if test "$UID" -eq 0 ; then
    _COL_USER=$COL_RED
    _p=" #"
else
    _COL_USER=$COL_GRE
    _p=" >"
fi
COLORIZED_PROMPT="${_COL_USER}\u${COL_WHI}@${COL_YEL}\h${COL_WHI}:\w${_p} \[\e[m\]"
case $TERM in
    *term | rxvt | screen )
        PS1="${COLORIZED_PROMPT}\[\e]0;\u@\h:\w\007\]" ;;
    linux )
        PS1="${COLORIZED_PROMPT}" ;;
    * ) 
        PS1="\u@\h:\w${_p} " ;;
esac

准备 nfsd.sh

针对作者的原脚本做了一些修改，原脚本没法针对多个 IP 网段做控制，以及权限也是固定死的，相对于一些特殊场景不太适用：

PUBLIC_OPTIONS：新增的变量，用来控制默认的 NFS 权限，没有定义时，默认的权限是 no_subtree_check,no_auth_nlm,insecure,no_root_squash，可以自己修改脚本

增加逻辑：一些特殊场景可以直接映射配置文件到 /etc/exports，只需要 SHARED_DIRECTORY 变量的值在 /etc/exports 文件中存在，就不会通过环境变量修改容器内的 /etc/exports 文件

--no-nfs-version 2 这个 NFS 启动参数取消了，当前的 NFS 版本已经不支持 v2 版本了，不去掉这个参数，启动 NFS 会报错 2: Unsupported version

#!/bin/bash

# Make sure we react to these signals by running stop() when we see them - for clean shutdown
# And then exiting
trap "stop; exit 0;" SIGTERM SIGINT

stop()
{
  # We're here because we've seen SIGTERM, likely via a Docker stop command or similar
  # Let's shutdown cleanly
  echo "SIGTERM caught, terminating NFS process(es)..."
  /usr/sbin/exportfs -uav
  /usr/sbin/rpc.nfsd 0
  pid1=`pidof rpc.nfsd`
  pid2=`pidof rpc.mountd`
  # For IPv6 bug:
  pid3=`pidof rpcbind`
  kill -TERM $pid1 $pid2 $pid3 > /dev/null 2>&1
  echo "Terminated."
  exit
}

PUBLIC_OPTIONS=${PUBLIC_OPTIONS:-no_subtree_check,no_auth_nlm,insecure,no_root_squash}

# Check if the SHARED_DIRECTORY variable is empty
if [ -z "${SHARED_DIRECTORY}" ]; then
  echo "The SHARED_DIRECTORY environment variable is unset or null, exiting..."
  exit 1
fi

grep -q "${SHARED_DIRECTORY}" /etc/exports

if [ $? -eq 0 ];then
  echo "/etc/exports file exists,nothing to do"
else
  echo "Writing SHARED_DIRECTORY to /etc/exports file"
  echo "${SHARED_DIRECTORY} {{PERMITTED}}({{READ_ONLY}},fsid=0,{{SYNC}},${PUBLIC_OPTIONS})" > /etc/exports

  # Check if the PERMITTED variable is empty
  if [ -z "${PERMITTED}" ]; then
    echo "The PERMITTED environment variable is unset or null, defaulting to '*'."
    echo "This means any client can mount."
    /bin/sed -i "s/{{PERMITTED}}/*/g" /etc/exports
  else
    echo "The PERMITTED environment variable is set."
    echo "The permitted clients are: ${PERMITTED}."
    /bin/sed -i "s/{{PERMITTED}}/"${PERMITTED}"/g" /etc/exports
  fi

  # Check if the READ_ONLY variable is set (rather than a null string) using parameter expansion
  if [ -z ${READ_ONLY+y} ]; then
    echo "The READ_ONLY environment variable is unset or null, defaulting to 'rw'."
    echo "Clients have read/write access."
    /bin/sed -i "s/{{READ_ONLY}}/rw/g" /etc/exports
  else
    echo "The READ_ONLY environment variable is set."
    echo "Clients will have read-only access."
    /bin/sed -i "s/{{READ_ONLY}}/ro/g" /etc/exports
  fi

  # Check if the SYNC variable is set (rather than a null string) using parameter expansion
  if [ -z "${SYNC+y}" ]; then
    echo "The SYNC environment variable is unset or null, defaulting to 'async' mode".
    echo "Writes will not be immediately written to disk."
    /bin/sed -i "s/{{SYNC}}/async/g" /etc/exports
  else
    echo "The SYNC environment variable is set, using 'sync' mode".
    echo "Writes will be immediately written to disk."
    /bin/sed -i "s/{{SYNC}}/sync/g" /etc/exports
  fi
fi

# Partially set 'unofficial Bash Strict Mode' as described here: http://redsymbol.net/articles/unofficial-bash-strict-mode/
# We don't set -e because the pidof command returns an exit code of 1 when the specified process is not found
# We expect this at times and don't want the script to be terminated when it occurs
set -uo pipefail
IFS=$'\n\t'

# This loop runs till until we've started up successfully
while true; do

  # Check if NFS is running by recording it's PID (if it's not running $pid will be null):
  pid=`pidof rpc.mountd`

  # If $pid is null, do this to start or restart NFS:
  while [ -z "$pid" ]; do
    echo "Displaying /etc/exports contents:"
    cat /etc/exports
    echo ""

    # Normally only required if v3 will be used
    # But currently enabled to overcome an NFS bug around opening an IPv6 socket
    echo "Starting rpcbind..."
    /sbin/rpcbind -w
    echo "Displaying rpcbind status..."
    /sbin/rpcinfo

    # Only required if v3 will be used
    # /usr/sbin/rpc.idmapd
    # /usr/sbin/rpc.gssd -v
    # /usr/sbin/rpc.statd

    echo "Starting NFS in the background..."
    /usr/sbin/rpc.nfsd --debug 8 --no-udp --no-nfs-version 3
    echo "Exporting File System..."
    if /usr/sbin/exportfs -rv; then
      /usr/sbin/exportfs
    else
      echo "Export validation failed, exiting..."
      exit 1
    fi
    echo "Starting Mountd in the background..."These
    /usr/sbin/rpc.mountd --debug all --no-udp --no-nfs-version 3
# --exports-file /etc/exports

    # Check if NFS is now running by recording it's PID (if it's not running $pid will be null):
    pid=`pidof rpc.mountd`

    # If $pid is null, startup failed; log the fact and sleep for 2s
    # We'll then automatically loop through and try again
    if [ -z "$pid" ]; then
      echo "Startup of NFS failed, sleeping for 2s, then retrying..."
      sleep 2
    fi

  done

  # Break this outer loop once we've started up successfully
  # Otherwise, we'll silently restart and Docker won't know
  echo "Startup successful."
  break

done

while true; do

  # Check if NFS is STILL running by recording it's PID (if it's not running $pid will be null):
  pid=`pidof rpc.mountd`
  # If it is not, lets kill our PID1 process (this script) by breaking out of this while loop:
  # This ensures Docker observes the failure and handles it as necessary
  if [ -z "$pid" ]; then
    echo "NFS has failed, exiting, so Docker can restart the container..."
    break
  fi

  # If it is, give the CPU a rest
  sleep 1

done

sleep 1
exit 1

构建镜像

目前来说，alpine-3.20 容器内安装的 NFS server 是 2.6.4 版本，这种具体容器的名称和 tag，大家自己安排就行，不关键，这个命名方式只是我的习惯而已

docker build -t nfs-server-2.6.4:alpine-3.20 .

特权模式

NFS 需要挂载 /proc/fs/nfsd 和 /var/lib/nfs/rpc_pipefs 这两个目录，不启用特权，会有如下报错，所以启用特权模式是无法避免的

mount: /var/lib/nfs/rpc_pipefs: permission denied.
       dmesg (1) may have more information after failed mount system call.
mount: /proc/fs/nfsd: permission denied.
       dmesg (1) may have more information after failed mount system call

如果完全开启 --privileged ，那只会更危险，这里采用 Linux capabilities 的 SYS_ADMIN 支持容器内部进行文件的挂载特权

docker

docker run --cap-add SYS_ADMIN

docker-compose

cap_add:
  - SYS_ADMIN
  - SETPCAP

docker run 的方式

环境变量方式

SHARED_DIRECTORY 变量是必须传参的，定义的是 NFS 的共享目录名称
SYNC= 表示采用 sync 的模式，启动时不带 -e SYNC= 表示采用 async
- sync 模式:
  - 写入过程：每次客户端进行写操作时，数据会立即写入到服务器的磁盘上，确保数据在操作完成时已经安全地存储在服务器端。
  - 优点：数据可靠性高，确保数据不会因为服务器宕机或网络问题而丢失。每次写入操作都会被确认已经完成，数据一致性较好。
    
    缺点：由于需要等待磁盘操作完成，写入速度较慢，性能相对较低，特别是在频繁的写入操作中。
- async 模式:
  - 写入过程：客户端的写操作会立即返回成功，但数据并不立即写入到服务器的磁盘，而是被缓存起来，稍后批量写入。服务器会在空闲时处理这些写入请求。
  - 优点：写入性能较高，因为客户端不需要等待每个写操作被真正写入磁盘，减少了延迟，适合对性能要求较高的场景。
  - 缺点：如果服务器在数据实际写入磁盘之前宕机，可能导致数据丢失，数据可靠性相对较低。
- 适用场景
  - sync：适合对数据可靠性要求高的场景，例如数据库或需要确保数据不会丢失的关键任务。
  - async：适合对性能要求较高，但数据偶尔丢失可以接受的场景，例如临时数据或对性能有更高要求的应用。
READ_ONLY= 表示共享目录采用 ro 只读模式
PERMITTED="10.11.99.*"：表示仅允许 IP 地址以 10.11.99 开头的主机挂载文件共享，不指定表示所有 IP 地址都可以挂载文件共享

docker run -d \
--name nfs-server \
--cap-add SYS_ADMIN \
-p 20490:2049 \
-v /appdata:/appdata \
-e SHARED_DIRECTORY=/appdata \
-e SYNC= \
nfs-server-2.6.4:alpine-3.20

配置文件挂载方式

自己准备 exports 配置文件，适合一些权限细化管理，访问地址细化管理的场景

docker run -d \
--name nfs-server \
--cap-add SYS_ADMIN \
-p 20490:2049 \
-v /nfsshare:/nfsshare \
-v /path/to/exports:/etc/exports \
-e SHARED_DIRECTORY=/nfsshare \
nfs-server-2.6.4:alpine-3.20

docker-compose 的方式

services:
  nfs-server:
    container_name: nfs-server
    hostname: nfs-server
    image: nfs-server-2.6.4:alpine-3.20
    restart: always
    cap_add:
      - SYS_ADMIN
    ports:
      - "20490:2049"
    networks:
      - nfs-net
    environment:
      - SHARED_DIRECTORY=/nfsshare
    deploy:
      resources:
        limits:
          cpus: '1'
          memory: 2000M
    volumes:
      - /nfsshare:/nfsshare
      - ./exports:/etc/exports

networks:
  nfs-net:
    driver: bridge

启动容器

docker-compose up -d

本地挂载 NFS

配置文件默认启用了 fsid=0，因此从客户端挂载时无需指定文件夹名称

外部映射的端口不是 2049，挂载时，需要指定外部映射端口

mount -t nfs -o port=20490,mountport=20490 10.11.12.101:/ /some/where/here

题外话

什么是 Capabilities

从 Linux 2.2开始，Linux 将传统上与超级用户相关的特权划分为不同的单元，称为 Capabilities，可以独立启用和禁用

常见的 Capabilities 列表

CAP_CHOWN
- 允许更改文件的所有者（chown）。
CAP_DAC_OVERRIDE
- 绕过文件读取、写入和执行权限检查。
CAP_DAC_READ_SEARCH
- 绕过对文件只读和目录遍历的权限检查。
CAP_FOWNER
- 绕过对文件所有者的一些权限限制（如强制锁）。
CAP_FSETID
- 文件的 setuid 和 setgid 位在某些情况下不会被清除。
CAP_KILL
- 可以向任何进程发送信号，而不仅仅是属于自己的进程。
CAP_SETGID
- 允许设置进程的组 ID（setgid）。
CAP_SETUID
- 允许设置进程的用户 ID（setuid）。
CAP_NET_BIND_SERVICE
- 允许绑定到小于 1024 的端口号（如 HTTP 使用的 80 端口）。
CAP_NET_RAW
- 允许使用原始套接字和一些网络操作（如 ping）。
CAP_SYS_CHROOT
- 允许使用 chroot 来更改根文件系统。
CAP_SYS_ADMIN
- 非常强大的权限，允许执行很多系统管理操作（包括挂载和卸载文件系统、加载内核模块等）。
CAP_SYS_BOOT
- 允许重启或关闭系统。
CAP_SYS_MODULE
- 允许加载和卸载内核模块。
CAP_SYS_NICE
- 允许更改进程优先级。
CAP_SYS_PACCT
- 允许打开或关闭进程会计功能。
CAP_SYS_PTRACE
- 允许追踪和调试其他进程。
CAP_SYS_RAWIO
- 允许直接执行 I/O 操作。
CAP_SYS_RESOURCE
- 允许增加资源限制（如内存锁定、设置超高的进程优先级等）。
CAP_SYS_TIME
- 允许更改系统时间。
CAP_SYS_TTY_CONFIG
- 允许配置 TTY 设备。
CAP_AUDIT_CONTROL
- 允许开启、关闭、配置审计系统。
CAP_AUDIT_WRITE
- 允许将记录写入审计日志。
CAP_MKNOD
- 允许使用 mknod 创建特殊文件（设备文件）。
CAP_LEASE
- 允许对文件设置租约（lease）。
CAP_LINUX_IMMUTABLE
- 允许设置 / 清除文件的不可修改标志（immutable）。
CAP_IPC_LOCK
- 允许锁定共享内存。
CAP_IPC_OWNER
- 绕过某些 IPC 权限检查。
CAP_SYSLOG
- 允许对 kernel syslog 进行操作（控制系统日志行为）。
CAP_WAKE_ALARM
- 允许唤醒系统（通过唤醒事件）。
CAP_BLOCK_SUSPEND
- 允许阻止系统进入挂起状态。
CAP_PERFMON
- 允许执行性能监控和观测任务。
CAP_BPF
- 允许加载、卸载和查看 BPF 程序。
CAP_CHECKPOINT_RESTORE
- 允许进行进程的检查点和恢复操作。

以上这些翻译，取之 ChatGPT

获取 Capabilities 列表

方法一

grep "#define CAP_" /usr/include/linux/capability.h

可以看到 Capabilities 对应的编码

#define CAP_CHOWN            0
#define CAP_DAC_OVERRIDE     1
#define CAP_DAC_READ_SEARCH  2
#define CAP_FOWNER           3
#define CAP_FSETID           4
#define CAP_KILL             5
#define CAP_SETGID           6
#define CAP_SETUID           7
#define CAP_SETPCAP          8
#define CAP_LINUX_IMMUTABLE  9
#define CAP_NET_BIND_SERVICE 10
#define CAP_NET_BROADCAST    11
#define CAP_NET_ADMIN        12
#define CAP_NET_RAW          13
#define CAP_IPC_LOCK         14
#define CAP_IPC_OWNER        15
#define CAP_SYS_MODULE       16
#define CAP_SYS_RAWIO        17
#define CAP_SYS_CHROOT       18
#define CAP_SYS_PTRACE       19
#define CAP_SYS_PACCT        20
#define CAP_SYS_ADMIN        21
#define CAP_SYS_BOOT         22
#define CAP_SYS_NICE         23
#define CAP_SYS_RESOURCE     24
#define CAP_SYS_TIME         25
#define CAP_SYS_TTY_CONFIG   26
#define CAP_MKNOD            27
#define CAP_LEASE            28
#define CAP_AUDIT_WRITE      29
#define CAP_AUDIT_CONTROL    30
#define CAP_SETFCAP          31
#define CAP_MAC_OVERRIDE     32
#define CAP_MAC_ADMIN        33
#define CAP_SYSLOG           34
#define CAP_WAKE_ALARM            35
#define CAP_BLOCK_SUSPEND    36
#define CAP_LAST_CAP         CAP_BLOCK_SUSPEND
#define CAP_TO_INDEX(x)     ((x) >> 5)        /* 1 << 5 == bits in __u32 */
#define CAP_TO_MASK(x)      (1 << ((x) & 31)) /* mask for indexed __u32 */

方法二

capsh --print

它不会给出完整的 Capabilities 列表，但可以展示当前系统可用的 Capabilities

Current: = cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,35,36,37+ep
Bounding set =cap_chown,cap_dac_override,cap_dac_read_search,cap_fowner,cap_fsetid,cap_kill,cap_setgid,cap_setuid,cap_setpcap,cap_linux_immutable,cap_net_bind_service,cap_net_broadcast,cap_net_admin,cap_net_raw,cap_ipc_lock,cap_ipc_owner,cap_sys_module,cap_sys_rawio,cap_sys_chroot,cap_sys_ptrace,cap_sys_pacct,cap_sys_admin,cap_sys_boot,cap_sys_nice,cap_sys_resource,cap_sys_time,cap_sys_tty_config,cap_mknod,cap_lease,cap_audit_write,cap_audit_control,cap_setfcap,cap_mac_override,cap_mac_admin,cap_syslog,35,36,37
Securebits: 00/0x0/1'b0
 secure-noroot: no (unlocked)
 secure-no-suid-fixup: no (unlocked)
 secure-keep-caps: no (unlocked)
uid=0(root)
gid=0(root)
groups=0(root)

方法三

官方的 man 手册

posted @ 2024-10-24 22:24 月巴左耳东阅读(1522) 评论(0) 收藏举报

刷新页面返回顶部

月巴左耳东

以梦为马|越骑越傻

使用 docker 的方式部署 NFS server 提供文件共享能力

构建 NFS server 镜像

准备 Dockerfile

准备 .bashrc 文件

准备 nfsd.sh

构建镜像

特权模式

docker

docker-compose

docker run 的方式

环境变量方式

配置文件挂载方式

docker-compose 的方式

本地挂载 NFS

题外话

什么是 Capabilities

常见的 Capabilities 列表

获取 Capabilities 列表

方法一

方法二

方法三