# 集群规划
NN ND JN ZK ZKFC RM
master 1 1 1
slave1 1 1 1 1 1
slave2 1 1 1 1
slave3 1 1 1
1、服务器时间同步:
yum install ntp;
ntpdate -u sla.time.edu.cn
date
网络、hosts、防火墙关闭
2、JDK1.8+
3、Hadoop解压安装;
4、免密钥:
master --> slave1,slave2,slave3
slave1 --> master,slave2,slave3
注意:NN之间必须免密钥(*****)
$ ssh-keygen -t rsa (回车四次)
$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
master公钥文件拷贝至slave1,再把master的公钥内容追加至slave1的authorized_keys
5、配置文件:
$ vi /etc/profile
export JAVA_HOME=
export PATH=$PATH:${JAVA_HOME}/bin
export HADOOP_HOME=
export PATH=$PATH:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin
hadoop-env.sh >> JAVA_HOME
core-site.xml >> hadoop.tmp.dir #自定义:/var/hadoop/tmp
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181</value>
</property>
hdfs-site.xml (*****) --> 官方文档
# dfs.nameservices - the logical name for this new nameservice
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
# dfs.ha.namenodes.[nameservice ID] - unique identifiers for each NameNode in the nameservice
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
# dfs.namenode.rpc-address.[nameservice ID].[name node ID] - the fully-qualified RPC address for each NameNode to listen on
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>machine1.example.com:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>machine2.example.com:8020</value>
</property>
# dfs.namenode.http-address.[nameservice ID].[name node ID] - the fully-qualified HTTP address for each NameNode to listen on
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>machine1.example.com:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>machine2.example.com:50070</value>
</property>
# dfs.namenode.shared.edits.dir - the URI which identifies the group of JNs where the NameNodes will write/read edits
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node1.example.com:8485;node2.example.com:8485;node3.example.com:8485/mycluster</value>
</property>
# dfs.client.failover.proxy.provider.[nameservice ID] - the Java class that HDFS clients use to contact the Active NameNode
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/exampleuser/.ssh/id_rsa</value>
</property>
# dfs.journalnode.edits.dir
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/path/to/journal/node/local/data</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
yarn-site.xml:
<configuration>
<!-- 开启RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>mycluster</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>slave02</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>slave02</value>
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>slave1:2181,slave2:2181,slave3:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
slaves # 指定DN
6、同步配置文件(服务器)
7、启动JN
三台master,slave1,slave2分别执行:hadoop-daemon.sh start journalnode
8、格式化NN
master --> 执行hadoop namenode -format;
9、同步namenode:
其他没有格式化的NN上执行:slave1 --> hdfs namenode -bootstrapStandby
10、启动ZK集群:
三台ZK 分别执行:zkServer.sh start
11、格式化ZK
在master执行:hdfs zkfc -formatZK;
12、启动:start-dfs.sh