hadoop HA+Federation(高可用联邦)搭建配置(二)
hadoop HA+Federation(高可用联邦)搭建配置(二)
标签(空格分隔): hadoop
core-site.xml
<?xml version="1.0" encoding="utf-8"?>
# <configuration> # 注意此处的修改
<configuration xmlns:xi="http://www.w3.org/2001/XInclude">
<xi:include href="/app/hadoop/etc/hadoop/mountTable.xml" /> # 此处引入federation的额外配置文件
<property>
<!-- 指定hdfs的nameservice名称,在 mountTable.xml 文件中会引用 -->
<name>fs.defaultFS</name>
<value>viewfs://flashhadoop/</value>
</property>
<!-- 指定hadoop数据存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/app/hadoop/tmp</value>
</property>
<property>
<!-- 注意此处将该配置项从 hdfs-site.xml 文件中迁移到此处, -->
<name>dfs.journalnode.edits.dir</name>
<value>/data1/data/flashhadoop/journalnode/data</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>XXX:2181</value>
</property>
</configuration>
mountTable.xml
<?xml version="1.0" encoding="utf-8"?>
<configuration>
<property>
<!-- 将 hdfs 的 /usr 目录挂载到 ns1 的NN下管理,整个federation的不同HA集群也是可以读写此目录的,但是在指定路径是需要指定完全路径 -->
<name>fs.viewfs.mounttable.flashhadoop.link./usr</name>
<value>hdfs://namespace1</value>
</property>
<property>
<name>fs.viewfs.mounttable.flashhadoop.link./home</name>
<value>hdfs://namespace2</value>
</property>
<property>
<!-- 指定 /tmp 目录,许多依赖hdfs的组件可能会用到此目录 -->
<name>fs.viewfs.mounttable.flashhadoop.link./tmp</name>
<value>hdfs://namespace1/tmp</value>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0" encoding="utf-8"?>
<!-- HDFS-HA 配置 -->
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<!-- 白名单:仅允许以下datanode连接到NN,一行一个,也可以指定一个文件 -->
<name>dfs.hosts</name>
<value>
<!-- ~/VMBigData/hadoop/default/etc/hadoop/hosts.allow -->
VECS00001
VECS00002
VECS00004
VECS0005
</value>
</property>
<property>
<!-- 黑名单:不允许以下datanode连接到NN,一行一个,也可以指定一个文件 -->
<name>dfs.hosts.exclude</name>
<value></value>
</property>
<property>
<!-- 集群的命名空间、逻辑名称,可配置多个,但是与 cmt.xml 配置对应 -->
<name>dfs.nameservices</name>
<value>namespace1,namespace2</value>
</property>
<property>
<!-- 命名空间中所有NameNode的唯一标示。该标识指示集群中有哪些NameNode。目前单个集群最多只能配置两个NameNode -->
<name>dfs.ha.namenodes.namespace1</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.ha.namenodes.namespace2</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.namespace1.nn1</name>
<value>VECS00001:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.namespace1.nn1</name>
<value>VECS00001:50070</value>
</property>
<property>
<name>dfs.namenode.rpc-address.namespace1.nn2</name>
<value>VECS00002:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.namespace1.nn2</name>
<value>VECS00002:50070</value>
</property>
# =====================namespace2 =======================
<property>
<name>dfs.namenode.rpc-address.namespace2.nn1</name>
<value>VECS00004:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.namespace2.nn1</name>
<value>VECS00004:50070</value>
</property>
<property>
<name>dfs.namenode.rpc-address.namespace2.nn2</name>
<value>VECS0005:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.namespace2.nn2</name>
<value>VECS0005:50070</value>
</property>
<property>
<!-- JournalNode URLs,ActiveNameNode 会将 Edit Log 写入这些 JournalNode 所配置的本地目录即 dfs.journalnode.edits.dir -->
<name>dfs.namenode.shared.edits.dir</name>
<!-- 注意此处的namespace1,当配置文件所在节点处于namespace1集群时,此处为namespace1,当处于namespace2集群时,此处为namespace2 ,一定注意是在各个namenode 节点,-->
<value>qjournal://VECS00001:8485;VECS00002:8485;VECS00003:8485;VECS00004:8485;VECS0005:8485/namespace1</value>
</property>
<!-- JournalNode 用于存放 editlog 和其他状态信息的目录 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data1/data/flashhadoop/journal/data</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.namespace1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.namespace2</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 一种关于 NameNode 的隔离机制(fencing) -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/vagrant/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<!-- 创建的namenode文件夹位置,如有多个用逗号隔开。配置多个的话,每一个目录下数据都是相同的,达到数据冗余备份的目的 -->
<value>file:///data1/data/flashHadoop/namenode/,file:///data2/data/flashHadoop/namenode/</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<!-- 创建的datanode文件夹位置,多个用逗号隔开,实际不存在的目录会被忽略 -->
<value>file:///data1/HDATA/dfs/local,
file:///data2/HDATA/dfs/local,
file:///data3/HDATA/dfs/local,
file:///data4/HDATA/dfs/local,
file:///data5/HDATA/dfs/local,
file:///data6/HDATA/dfs/local,
file:///data7/HDATA/dfs/local,
file:///data8/HDATA/dfs/local,
file:///data9/HDATA/dfs/local,
file:///data10/HDATA/dfs/local,
file:///data11/HDATA/dfs/local,
file:///data12/HDATA/dfs/local</value>
</property>
</configuration>
Yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://flashHadoop/tmp/logs</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<property>
<description>Classpath for typical applications.</description>
<name>yarn.application.classpath</name>
<value>
$HADOOP_CONF_DIR,
$HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
$HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
$HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
$HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/common/*,
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/*,
$HADOOP_COMMON_HOME/share/hadoop/hdfs/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/*,
$HADOOP_COMMON_HOME/share/hadoop/mapreduce/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/*,
$HADOOP_COMMON_HOME/share/hadoop/yarn/lib/*
</value>
</property>
<!-- resourcemanager config -->
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>Yarn_Cluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>VECS00001</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>VECS00002</value>
</property>
<!-- CapacityScheduler -->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<!-- CapacityScheduler End-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
<value>5000</value>
</property>
<!-- 下线yarn nodemanager的列表文件。-->
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/app/hadoop/etc/hadoop/yarn.exclude</value>
<final>true</final>
</property>
<!-- ZKRMStateStore config -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>VECS00002:2181,VECS00001:2181,VECS00047:2181,VECS01118:2181,VECS01119:2181</value>
</property>
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>VECS00002:2181,VECS00001:2181,VECS00047:2181,VECS01118:2181,VECS01119:2181</value>
</property>
<!-- applications manager interface -->
<!--客户端通过该地址向RM提交对应用程序操作-->
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>VECS00001:23140</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>VECS00002:23140</value>
</property>
<!-- scheduler interface -->
<!--向RM调度资源地址-->
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>VECS00001:23130</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>VECS00002:23130</value>
</property>
<!-- RM admin interface -->
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>VECS00001:23141</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>VECS00002:23141</value>
</property>
<!-- RM resource-tracker interface nm向rm汇报心跳&& 领取任务-->
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>VECS00001:23125</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>VECS00002:23125</value>
</property>
<!-- RM web application interface -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>VECS00001:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>VECS00002:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>VECS00001:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>VECS00002:23189</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://VECS00003:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>VECS00003:54315</value>
</property>
<!-- Node Manager Configs -->
<property>
<description>Address where the localizer IPC is.</description>
<name>yarn.nodemanager.localizer.address</name>
<value>0.0.0.0:23344</value>
</property>
<property>
<description>NM Webapp address.</description>
<name>yarn.nodemanager.webapp.address</name>
<value>0.0.0.0:8042</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>file:///data12/HDATA/yarn/local,
file:///data11/HDATA/yarn/local,
file:///data10/HDATA/yarn/local,
file:///data9/HDATA/yarn/local,
file:///data8/HDATA/yarn/local,
file:///data7/HDATA/yarn/local,
file:///data6/HDATA/yarn/local,
file:///data5/HDATA/yarn/local,
file:///data4/HDATA/yarn/local,
file:///data3/HDATA/yarn/local,
file:///data2/HDATA/yarn/local,
file:///data1/HDATA/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>file:///data12/HDATA/yarn/logs,
file:///data11/HDATA/yarn/logs,
file:///data10/HDATA/yarn/logs,
file:///data9/HDATA/yarn/logs,
file:///data8/HDATA/yarn/logs,
file:///data7/HDATA/yarn/logs,
file:///data6/HDATA/yarn/logs,
file:///data5/HDATA/yarn/logs,
file:///data4/HDATA/yarn/logs,
file:///data3/HDATA/yarn/logs,
file:///data2/HDATA/yarn/logs,
file:///data1/HDATA/yarn/logs</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>1200</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>23080</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
<value>true</value>
</property>
<!-- tuning -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>102400</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>28</value>
</property>
<!-- tuning yarn container -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>8192</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.fair.allow-undeclared-pools</name>
<value>false</value>
</property>
<property>
<name>yarn.scheduler.fair.allow-undeclared-pools</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>4</value>
<description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>1209600</value>
</property>
<!-- 新增新特性 -->
<property>
<name>yarn.node-labels.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>hdfs://flashHadoop/yarn/yarn-node-labels/</value>
</property>
<!-- timeline server -->
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.http-cross-origin.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.hostname</name>
<value>VECS00001</value>
</property>
<property>
<name>yarn.timeline-service.handler-thread-count</name>
<value>10</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.path</name>
<value>/app/hadoop/tmp/yarn/timeline/</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-state-store.path</name>
<value>/app/hadoop/tmp/yarn/timeline/timeline-state-store.ldb</value>
</property>
<!--调整resourcemanager -->
<property>
<name>yarn.resourcemanager.client.thread-count</name>
<value>100</value>
</property>
<property>
<name>yarn.resourcemanager.amlauncher.thread-count</name>
<value>100</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.client.thread-count</name>
<value>100</value>
</property>
</configuration>
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>VECS00003:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>VECS00003:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/user</value>
</property>
<!-- tuning mapreduce -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1536m -Dfile.encoding=UTF-8</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>6144</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx4608m -Dfile.encoding=UTF-8</value>
</property>
<property>
<name>mapreduce.map.cpu.vcores</name>
<value>1</value>
</property>
<property>
<name>mapreduce.reduce.cpu.vcores</name>
<value>2</value>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value>file:///data8/HDATA/mapred/local,
file:///data7/HDATA/mapred/local,
file:///data6/HDATA/mapred/local,
file:///data5/HDATA/mapred/local,
file:///data4/HDATA/mapred/local,
file:///data3/HDATA/mapred/local,
file:///data2/HDATA/mapred/local,
file:///data1/HDATA/mapred/local</value>
</property>
<!--map and shuffle and reduce turning -->
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>300</value>
</property>
<!-- 30*10=io.sort.mb -->
<property>
<name>mapreduce.jobhistory.max-age-ms</name>
<value>1296000000</value>
<source>mapred-default.xml</source>
</property>
<property>
<name>mapreduce.jobhistory.joblist.cache.size</name>
<value>200000</value>
<source>mapred-default.xml</source>
</property>
</configuration>
hdfs-site.xml namespace1 && namespace2 !!!
在 hdfs-site.xml 文件中的 dfs.namenode.shared.edits.dir 配置项:
当配置文件所在节点处于namespace1集群时,此处值末尾部分为namespace1,当处于namespace2集群时,则为namespace2.
启动过程
零,格式化zkfc
在namespace1 nn1(VECS00001) && namespace2(VECS00004) nn1 do:
sudo su - hdfs
hdfs zkfc -formatZK
一,格式化namenode
在namespace1 nn1(VECS00001) && namespace2(VECS00004) nn1 do: (必须指定 clusterid)
hdfs namenode -format -clusterid XXXXXXX
二,启动格式化过后的nn
在namespace1 nn1(VECS00001) && namespace2(VECS00004) nn1 do:
sudo su - hdfs
hadoop-daemons.sh start namenode
三,standbu nn 同步active 元数据
在namespace1 nn2(VECS00002) && namespace2(VECS00005) nn2 do:
sudo su - hdfs
hdfs namenode -bootstrapStandby
hadoop-daemons.sh start namenode
同步完后 并启动
四,启动zkfc
在namespace1 nn1 nn2 && namespace2 nn1 nn2 do:
sudo su - hdfs
hadoop-daemons.shstart zkfc
启动集群所有datanode
hadoop-daemon.sh start datanode
五,启动yarn 集群
start resourcemanager
start nodemanager
start webproxy
start historyserver