DRBD+Pacemaker+提供高可用NFS

 2025/08/26 

环境准备


管理网络10.163.2.0/24 DHCP
存储网络172.16.0.0/24
系统版本CentOS 8.5.2111
交换机用L2做，这样简单点
正常生产会使用两台L3，两条链路，两块网卡做bond

# 更新yum源
rm -rf /etc/yum.repos.d/*
curl -o /etc/yum.repos.d/CentOS-Base.repo https://mirrors.aliyun.com/repo/Centos-vault-8.5.2111.repo

# 添加HA的软件源
cat > /etc/yum.repos.d/HighAvailability.repo << 'EOF'
[HighAvailability]
name=CentOS-8 - HighAvailability
baseurl=http://vault.centos.org/8.5.2111/HighAvailability/$basearch/os/
gpgcheck=0
enabled=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-centosofficial
EOF
yum makecache
yum update

cat > /etc/hosts << EOF
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
10.163.2.100 pcs1
10.163.2.106 pcs2
10.163.2.102 pcs3
172.16.0.1 drbd1
172.16.0.2 drbd2
172.16.0.3 drbd3
EOF

scp /etc/hosts pcs2:/etc/
scp /etc/hosts pcs3:/etc/

nmcli connection add con-name eth1 ifname eth1 ipv4.addresses 172.16.0.1/24 ipv4.method manual type ethernet
nmcli connection add con-name eth1 ifname eth1 ipv4.addresses 172.16.0.2/24 ipv4.method manual type ethernet
nmcli connection add con-name eth1 ifname eth1 ipv4.addresses 172.16.0.3/24 ipv4.method manual type ethernet

# 修改主机名
hostnamectl set-hostname drbd1 && bash
hostnamectl set-hostname drbd2 && bash
hostnamectl set-hostname drbd3 && bash

部署DRBD

每个节点执行

yum group install -y "Development Tools"
yum -y install wget vim net-tools lvm2 epel-release
yum -y install drbd

# 下载与编译drbd的内核模块
wget https://pkg.linbit.com/downloads/drbd/9.0/drbd-9.0.32-1.tar.gz
tar xf drbd-9.0.32-1.tar.gz 
cd drbd-9.0.32-1/
make && make install

# 加载模块
modprobe drbd
lsmod | grep -i drbd

# 持久化加载模块
echo drbd > /etc/modules-load.d/drbd.conf

# 修改全局配置文件
common {
...
        options {
                auto-promote yes;
        }
        ...
        net {
                protocol C;
        }

# 创建lvm
pvcreate /dev/sdb
vgcreate nfs /dev/sdb
lvcreate -n nfs -L 10G nfs

# 添加子配置文件
echo "resource nfs {
meta-disk internal;
device /dev/drbd1;
net {
verify-alg sha256;
}
on drbd1 {
node-id 0;
address 172.16.0.1:7788;
disk /dev/nfs/nfs;
}
on drbd2 {
node-id 1;
address 172.16.0.2:7788;
disk /dev/nfs/nfs;
}
on drbd3 {
node-id 2;
address 172.16.0.3:7788;
disk /dev/nfs/nfs;
}
# 如果要3节点，需要额外定义connection块
    connection {
        host drbd1 port 7788;
        host drbd2 port 7788;
    }
    connection {
        host drbd1 port 7788;
        host drbd3 port 7788;
    }
    connection {
        host drbd2 port 7788;
        host drbd3 port 7788;
    }
}
" > /etc/drbd.d/nfs-drdb.res

# 创建drbd资源
drbdadm create-md nfs
drbdadm up nfs
systemctl enable drbd --now

# 开启防火墙
firewall-cmd --permanent --add-port=7788/tcp
firewall-cmd --reload

# 设置其中一个节点为主
drbdadm primary nfs --force

# 格式化以加快同步进程
mkfs.xfs /dev/drbd1

# 同步完成
drbdadm status
nfs role:Primary
  disk:UpToDate
  drbd2 role:Secondary
    peer-disk:UpToDate
  drbd3 role:Secondary
    peer-disk:UpToDate
 
 
# 测试挂载
mkdir /mnt/nfs
dd if=/dev/zero of=/mnt/nfs/test-file.img bs=10M count=50

此时如果drbd1挂了，需要手动或脚本将切换后的主节点的/dev/drbd1设备进行挂载，但是如果让Pacemaker接管，就可以避免这个问题

部署Pacemaker

# epel会干扰安装Pacemaker
rm -rf /etc/yum.repos.d/epel*
yum -y install pcs pacemaker corosync fence-agents-all
systemctl enable pcsd --now

firewall-cmd --permanent --add-service=high-availability
firewall-cmd --reload

# 集群认证
echo 123456 | passwd --stdin hacluster
pcs host auth pcs1 pcs2 pcs3 -u hacluster -p 123456

# 搭建集群
pcs cluster setup mycluster pcs1 pcs2 pcs3
pcs cluster start --all
pcs cluster enable --all

pcs property set stonith-enabled=false

# 验证集群状态
pcs cluster status
Cluster Status:
 Cluster Summary:
   * Stack: corosync
   * Current DC: pcs3 (version 2.1.0-8.el8-7c3f660707) - partition with quorum
   * Last updated: Mon Aug 25 20:25:17 2025
   * Last change:  Mon Aug 25 20:24:48 2025 by hacluster via crmd on pcs3
   * 3 nodes configured
   * 0 resource instances configured
 Node List:
   * Online: [ pcs1 pcs2 pcs3 ]

PCSD Status:
  pcs1: Online
  pcs2: Online
  pcs3: Online
  
# 3节点调优，每个节点都要执行
vim /etc/corosync/corosync.conf
quorum {
    provider: corosync_votequorum
    wait_for_all=1
    last_man_standing=1
}

pcs cluster sync
pcs cluster reload corosync
systemctl restart pacemaker.service
systemctl restart pcsd

Pacemaker接管资源

接管drbd

# 使用-f 配置文件的方式进行资源配置
# 此时只配置配置文件，而不是直接配置
-f drbd_cfg

# 应用配置文件
pcs cluster cib-push drbd_cfg --config

# pcs自带的资源类型中 不支持drbd的ocf
pcs resource list | grep drbd
service:drbd - systemd unit file for drbd
...

# 重新装上epel，安装额外drbd依赖包
yum -y remove epel-release && yum -y install epel-release
yum -y install drbd-pacemaker

pcs resource list | grep drbd | grep ocf
ocf:linbit:drbd - Manages a DRBD device as a Master/Slave resource
ocf:linbit:drbd-attr - import DRBD state change events as transient node

# 查看用法与参数
pcs resource describe ocf:linbit:drbd

# 创建资源
pcs -f drbd_cfg resource create nfs-drbd ocf:linbit:drbd drbd_resource=nfs \
op monitor interval="29s" role="Master" \
op monitor interval="31s" role="Slave" \
op start interval="0s" timeout="240s" \
op stop interval="0s" timeout="100s"
Master角色每29秒执行一次健康检查
Slave角色每31秒执行一次健康检查
启动超时240秒，停止超时100秒

# 创建一个可提升资源Promotable Resource
# 允许资源实例在不同的节点上以不同的角色运行
pcs -f drbd_cfg resource promotable nfs-drbd \
promoted-max=1 \
promoted-node-max=1 \
clone-max=3 \
clone-node-max=1 \
notify=true
设置只能有一个节点作为Master
单个节点上最多可以运行1个Master示例
资源实例的总数为3
单个节点上最多可以运行1个资源实例
当资源状态发生变化时，Pacemaker会通知所有实例

# 接管文件系统
pcs -f drbd_cfg resource create nfsfs FileSystem \
device='/dev/drbd1' directory='/mnt/nfs' fstype='xfs'

# 添加约束条件
# 指定文件系统依赖于可提升资源nfs-drbd-clone,且需要在角色master上
pcs -f drbd_cfg constraint colocation add \
nfsfs with nfs-drbd-clone INFINITY \
with-rsc-role=Master

# 查看状态
pcs resource cleanup nfs-drbd
pcs resource cleanup nfs-drbd-clone
pcs resource refresh
pcs resource status
  * Clone Set: nfs-drbd-clone [nfs-drbd] (promotable):
    * Masters: [ pcs2 ]
    * Slaves: [ pcs1 pcs3 ]
  * nfsfs       (ocf::heartbeat:Filesystem):     Started pcs2

# 关闭测试
现在让pcs2关机systemctl poweroff
 pcs resource status
  * Clone Set: nfs-drbd-clone [nfs-drbd] (promotable):
    * Masters: [ pcs3 ]
    * Slaves: [ pcs1 ]
    * Stopped: [ pcs2 ]
  * nfsfs       (ocf::heartbeat:Filesystem):     Started pcs3

可以看到已经迁移到pcs3上了，并且nfsfs也过去了

创建nfs资源

yum -y install nfs-utils
systemctl enable nfs-server.service --now
firewall-cmd --add-service=nfs --permanent
firewall-cmd --add-service=rpc-bind --permanent
firewall-cmd --add-service=mountd --permanent
firewall-cmd --add-port=30000-30050/tcp --permanent
firewall-cmd --add-port=30000-30050/udp --permanent
firewall-cmd --reload

# 创建nfs资源
pcs -f drbd_cfg resource create nfs-server systemd:nfs-server \
op monitor interval='20s' op start interval='0s' timeout='20s' \
op stop interval='0s' timeout='15s'

# 约束条件，绑定nfs-server与nfsfs到同一节点，并在nfsfs后启动
pcs -f drbd_cfg constraint colocation add nfs-server with nfsfs
pcs -f drbd_cfg constraint order nfsfs then nfs-server

# 手动在当前主节点启动nfs-server
systemctl start nfs-server
pcs resource status
  * Clone Set: nfs-drbd-clone [nfs-drbd] (promotable):
    * Masters: [ pcs2 ]
    * Slaves: [ pcs1 pcs3 ]
  * nfsfs       (ocf::heartbeat:Filesystem):     Started pcs2
  * nfs-server  (systemd:nfs-server):    Started pcs2
  
可以测试一下故障转移，我试过了没问题

配置挂载点和VIP资源

# 设置nfs挂载点
pcs -f drbd_cfg resource create exportsdir ocf:heartbeat:exportfs \
directory='/mnt/nfs' fsid='1' \
clientspec='10.163.2.0/24' unlock_on_stop='1' \
options='rw,sync'

# 绑定挂载点与nfs-server，并且在nfs-server启动后启动
pcs -f drbd_cfg constraint colocation add nfs-server with exportsdir
pcs -f drbd_cfg constraint order exportsdir then nfs-server

# 配置VIP
pcs -f drbd_cfg resource create nfsClusterIP ocf:heartbeat:IPaddr2 \
ip=10.163.2.150 cidr_netmask=24

# 绑定VIP与挂载点到同一节点，并在挂载点启动后启动
pcs -f drbd_cfg constraint colocation add nfsClusterIP with exportsdir 
pcs -f drbd_cfg constraint order exportsdir then nfsClusterIP

# 应用配置
pcs cluster cib-push drbd_cfg --config
pcs resource status
  * Clone Set: nfs-drbd-clone [nfs-drbd] (promotable):
    * Masters: [ pcs2 ]
    * Slaves: [ pcs1 pcs3 ]
  * nfsfs       (ocf::heartbeat:Filesystem):     Started pcs2
  * nfs-server  (systemd:nfs-server):    Started pcs2
  * exportsdir  (ocf::heartbeat:exportfs):       Stopped
  * nfsClusterIP        (ocf::heartbeat:IPaddr2):        Stopped

# 让nfs-drbd的尽量运行在pcs1（可选）
pcs -f drbd_cfg constraint location nfs-drbd-clone \
prefers pcs1=INFINITY pcs2=50

# 在当前主节点上手动启配置
pcs resource debug-start exportsdir
pcs resource debug-start nfsClusterIP

约束合集

pcs -f drbd_cfg constraint colocation add \
nfsfs with nfs-drbd-clone INFINITY \
with-rsc-role=Master

pcs constraint order start nfsfs then start nfs-server
pcs constraint order start nfs-server then start exportsdir
pcs constraint order start exportsdir then start nfsClusterIP

pcs constraint colocation add nfs-server with nfsfs
pcs constraint colocation add exportsdir with nfs-server
pcs constraint colocation add nfsClusterIP with exportsdir

测试资源迁移

pcs property set no-quorum-policy=ignore
pcs property set stonith-enabled=false

pcs resource status
  * Clone Set: nfs-drbd-clone [nfs-drbd] (promotable):
    * Masters: [ pcs3 ]
    * Slaves: [ pcs1 pcs2 ]
  * nfsfs       (ocf::heartbeat:Filesystem):     Started pcs3
  * nfs-server  (systemd:nfs-server):    Started pcs3
  * exportsdir  (ocf::heartbeat:exportfs):       Started pcs3
  * nfsClusterIP        (ocf::heartbeat:IPaddr2):        Started pcs3

# 关闭pcs3，可见立即切换到了pcs1
pcs resource status
  * Clone Set: nfs-drbd-clone [nfs-drbd] (promotable):
    * Masters: [ pcs1 ]
    * Slaves: [ pcs2 ]
    * Stopped: [ pcs3 ]
  * nfsfs       (ocf::heartbeat:Filesystem):     Started pcs1
  * nfs-server  (systemd:nfs-server):    Started pcs1
  * exportsdir  (ocf::heartbeat:exportfs):       Started pcs1
  * nfsClusterIP        (ocf::heartbeat:IPaddr2):        Started pcs1

# 等待10秒，关闭pcs1
pcs resource status
  * Clone Set: nfs-drbd-clone [nfs-drbd] (promotable):
    * Slaves: [ pcs2 ]
    * Stopped: [ pcs1 pcs3 ]
  * nfsfs       (ocf::heartbeat:Filesystem):     Started pcs2
  * nfs-server  (systemd:nfs-server):    Started pcs2
  * exportsdir  (ocf::heartbeat:exportfs):       Started pcs2
  * nfsClusterIP        (ocf::heartbeat:IPaddr2):        Started pcs2

可以看到现在pcs2孤岛模式运行也没有问题
通过ping VIP，发现在两次切换中，只有8个包未送达
因为我们的VIP是最后才起来的，可以认为是前面的服务都已经就绪了