HDFS HA测试环境搭建及故障切换测试

一、HDFS HA测试环境搭建

1、集群规划

HDFS HA高可用集群
主机名 IP 安装的软件 执行的进程
bd-dev01 10.176.158.41 jdk、zookeeper QuorumPeerMain
bd-dev02 10.176.158.42 jdk、zookeeper QuorumPeerMain
bd-dev03 10.176.158.43 jdk、zookeeper QuorumPeerMain
bd-dev05 10.176.158.45 jdk、Hadoop NameNode、DFSZKFailoverController(zkfc)
bd-dev06 10.176.158.46 jdk、Hadoop NameNode、DFSZKFailoverController(zkfc)
bd-dev07 10.176.158.47 jdk、Hadoop DataNode、JournalNode
bd-dev08 10.176.158.48 jdk、Hadoop DataNode、JournalNode
bd-dev09 10.176.158.49 jdk、Hadoop DataNode、JournalNode

2、主机基础配置

设置hosts、关闭防火墙、关闭Selinux、免密钥登陆。

3、安装jdk

#解压

tar -xvf jdk-8u131-linux-x64.tar.gz

mv jdk1.8.0_131 /usr/local/jdk1.8

#设置环境变量
vim /etc/profile

JAVA_HOME=/usr/local/jdk1.8/
JAVA_BIN=/usr/local/jdk1.8/bin
JRE_HOME=/usr/local/jdk1.8/jre
PATH=$PATH:/usr/local/jdk1.8/bin:/usr/local/jdk1.8/jre/bin
CLASSPATH=/usr/local/jdk1.8/jre/lib:/usr/local/jdk1.8/lib:/usr/local/jdk1.8/jre/lib/charsets.jar
4、安装zookeeper
#下载
[root@sl-opencron src]# wget https://mirrors.tuna.tsinghua.edu.cn/apache/zookeeper/zookeeper-3.4.14/zookeeper-3.4.14.tar.gz

[root@sl-opencron src]# tar -xvf zookeeper-3.4.14.tar.gz

#解压后的目录mv 到/usr/local/下
[root@sl-opencron src]# mv zookeeper-3.4.14 /usr/local/zookeeper

 #Step7.1:配置zookeeper

cd /usr/local/zookeeper/conf/

#将zoo_sample.cfg(样板配置文件)命名为zoo.cfg
mv zoo_sample.cfg  zoo.cfg

#修改配置文件
[root@sl-opencron conf]# vim zoo.cfg

***********
***********

#路径可自定义
dataDir=/data/zookeeper 

server.1=10.176.158.41:2888:3888
server.2=10.176.158.42:2888:3888
server.3=10.176.158.43:2888:3888

#生成myid文件

mkdir /data/zookeeper

cd /data/zookeeper

touch myid

echo "1" >> myid

说明:bd-dev01 myid是1 bd-dev01 myid是2  bd-dev01 myid是3

#启动zookeeper集群

说明:分别在bd-dev01 bd-dev02 bd-dev03

cd /usr/local/zookeeper/bin

./zkServer.sh start

cd /usr/local/zookeeper/bin

./zkServer.sh start

5、安装hdfs

#下载
wget http://www-eu.apache.org/dist/hadoop/common/hadoop-2.9.2/hadoop-2.9.2.tar.gz

#解压
tar -xvf hadoop-2.9.2.tar.gz

#解压后的目录移动到/usr/local/
 mv hadoop-2.9.2 /usr/local/hadoop

#创建几个目录,在所有的Hadoop节点执行
[root@hadooop-master hadoop]# mkdir /sda1/hdfs/tmp /sda1/hdfs/data /sda1/hdfs/name /sda1/hdfs/journal

#配置Hadoop

vim core-site.xml
core-site.xml
<
configuration> <property> <name>fs.defaultFS</name> <value>hdfs://ns1</value> </property> <property> <name>hadoop.tmp.dir</name> <value>/sda1/hdfs/tmp</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>10.176.158.41:2181,10.176.158.42:2181,10.176.158.43:2181</value> </property> </configuration>

vim hdfs-site.xml

hdfs-site.xml

<configuration>
 
<!--指定hdfs的nameservice为ns1,须要和core-site.xml中的保持一致 -->
        <property>
                <name>dfs.nameservices</name>
                <value>ns1</value>
        </property>
        <!-- ns1以下有两个NameNode,各自是nn1,nn2 -->
        <property>
                <name>dfs.ha.namenodes.ns1</name>
                <value>nn1,nn2</value>
        </property>
        <!-- nn1的RPC通信地址 -->
        <property>
                <name>dfs.namenode.rpc-address.ns1.nn1</name>
                <value>bd-dev05:9000</value>
        </property>
        <!-- nn1的http通信地址 -->
        <property>
                <name>dfs.namenode.http-address.ns1.nn1</name>
                <value>bd-dev05:50070</value>
        </property>
        <!-- nn2的RPC通信地址 -->
        <property>
                <name>dfs.namenode.rpc-address.ns1.nn2</name>
                <value>bd-dev06:9000</value>
        </property>
        <!-- nn2的http通信地址 -->
        <property>
                <name>dfs.namenode.http-address.ns1.nn2</name>
                <value>bd-dev06:50070</value>
        </property>
        <!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
        <property>
                <name>dfs.namenode.shared.edits.dir</name>
                <value>qjournal://bd-dev07:8485;bd-dev08:8485;bd-dev09:8485/ns1</value>
        </property>
        <!-- 指定JournalNode在本地磁盘存放数据的位置 -->
        <property>
                <name>dfs.journalnode.edits.dir</name>
                <value>/sda1/hdfs/journal</value>
        </property>
        <!-- 开启NameNode失败自己主动切换 -->
        <property>
                <name>dfs.ha.automatic-failover.enabled</name>
                <value>true</value>
        </property>
        <!-- 配置失败自己主动切换实现方式 -->
        <property>
                <name>dfs.client.failover.proxy.provider.ns1</name>
                <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
        </property>
        <!-- 配置隔离机制方法。多个机制用换行切割,即每一个机制暂用一行-->
        <property>
                <name>dfs.ha.fencing.methods</name>
                <value>
                        sshfence
                        shell(/bin/true)
                </value>
        </property>
        <!-- 使用sshfence隔离机制时须要ssh免登陆 -->
        <property>
                <name>dfs.ha.fencing.ssh.private-key-files</name>
                <value>/root/.ssh/id_rsa</value>
        </property>
        <!-- 配置sshfence隔离机制超时时间 -->
        <property>
                <name>dfs.ha.fencing.ssh.connect-timeout</name>
                <value>30000</value>
        </property>
        <!-- 配置namenode目录 -->
        <property>
                <name>dfs.namenode.name.dir</name>
                <value>/sda1/hdfs/name</value>
         </property>
    <!-- ##指定datanode软件存放文件块的本地目录 -->
         <property>
               <name>dfs.datanode.data.dir</name>
               <value>/sda1/hdfs/data</value>
         </property>
</configuration>
vim slaves

bd-dev07
bd-dev08
bd-dev09

#复制到每一台

scp -r /usr/local/hadoop bd-dev06:/usr/local

scp -r /usr/local/hadoop bd-dev07:/usr/local

scp -r /usr/local/hadoop bd-dev08:/usr/local

scp -r /usr/local/hadoop bd-dev09:/usr/local

#启动journalnode

说明:分别在bd-dev07 bd-dev08 bd-dev09启动

cd /usr/local/hadoop/sbin/

./hadoop-daemon.sh start journalnode

#格式化HDFS

说明:在bd-dev05操作

cd /usr/local/hadoop/bin/

./hdfs namenode -format

#格式化ZKFC

说明:在bd-dev05操作

cd /usr/local/hadoop/bin/

./hdfs zkfc -formatZK

#启动HDFS

说明:在bd-dev05操作

cd /usr/local/hadoop/sbin/

./start-dfs.sh

#启动bd-dev06的namenode

cd /usr/local/hadoop/

./bin/hdfs namenode -bootstrapStandby

sbin/hadoop-daemon.sh start namenode

启动说明

1 # 第一次启动的时候请严格按照上面的步骤【第一次涉及格式化问题】
2 # 第二次以及之后,步骤为: 启动zookeeper、HDFS

#查看进程

[root@bd-dev05 hadoop-2.9.2]# jps
113989 Jps
49621 DFSZKFailoverController
115356 NameNode

二、高可用切换测试

说明:初始bd-dev05的状态是standby、bd-dev06的状态是active

#将bd-dev05的namenode kill掉

[yun@bd-dev05 ~]$ jps

49621 DFSZKFailoverController

49191 NameNode

115001 Jps

[yun@bd-dev05 ~]$ kill 3283

[yun@bd-dev05 ~]$ jps

49621 DFSZKFailoverController

115001 Jps

说明:将bd-dev05的namenode kill掉后hdfs仍可使用,bd-dev06切换成active,bd-dev05切换从standby。
posted @ 2020-04-24 16:48  凌枫恋  阅读(469)  评论(0编辑  收藏  举报