YARN HA 配置文件设置

Hadoop 2.7.4 + HBase 1.2.6 + ZooKeeper 3.4.10 配置

本文为上述配置的一部分,为方便阅读,故设为独立页面

 

参考:http://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html

 

1:配置

首先创建local临时目录:/home/mytestzk/hadoop-2.7.4/tmp/yarn

  • yarn-site.xml

特别注意:对以下节点yarn.resourcemanager.ha.id,需要在每个ResourceManager上修改对应的值,但在其它机器上不需要此节点,比如在master机器上设置为rm1, 在slave1机器上设置为rm2

<configuration>
    <!-- Site specific YARN configuration properties -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
    <property>
        <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>

    <!--rm失联后重新链接的时间-->
    <property>
        <name>yarn.resourcemanager.connect.retry-interval.ms</name>
        <value>2000</value>
    </property>
    <!--开启resourcemanagerHA,默认为false-->
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>

    <!--配置resourcemanager-->
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>
    <property>
        <name>ha.zookeeper.quorum</name>
        <value>master:2181,slave1:2181,slave2:2181</value>
    </property>

    <!--开启故障自动切换-->
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>master</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>slave1</value>
    </property>

    <!--在master上配置rm1,在slave1上配置rm2, slave节点不用配这个属性-->
    <property>
        <name>yarn.resourcemanager.ha.id</name>
        <value>rm1</value>
        <description>If we want to launch more than one RM in single node,we need this configuration</description>
    </property>

    <!--开启自动恢复功能-->
    <property>
        <name>yarn.resourcemanager.recovery.enabled</name>
        <value>true</value>
    </property>

    <!--配置与zookeeper的连接地址-->
    <property>
        <name>yarn.resourcemanager.zk-state-store.address</name>
        <value>master:2181,slave1:2181,slave2:2181</value>
    </property>
    <property>
        <name>yarn.resourcemanager.store.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
    </property>
    <property>
        <name>yarn.resourcemanager.zk-address</name>
        <value>master:2181,slave1:2181,slave2:2181</value>
    </property>
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>appcluster-yarn</value>
    </property>

    <!--schelduler失联等待连接时间-->
    <property>
        <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
        <value>5000</value>
    </property>

    <!--配置rm1-->
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm1</name>
        <value>master:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
        <value>master:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm1</name>
        <value>master:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address.rm1</name>
        <value>master:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>master:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.admin.address.rm1</name>
        <value>master:23142</value>
    </property>
    
    <!--配置rm2-->
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2</name>
        <value>slave1:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
        <value>slave1:8031</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm2</name>
        <value>slave1:8032</value>
    </property>
    <property>
        <name>yarn.resourcemanager.admin.address.rm2</name>
        <value>slave1:8033</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>slave1:8088</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.admin.address.rm2</name>
        <value>slave1:23142</value>
    </property>

    <!--nodemanager 配置-->
    <property>
        <name>yarn.nodemanager.local-dirs</name>
        <value>/home/mytestzk/hadoop-2.7.4/tmp/yarn</value>
    </property>
    <property>
        <name>yarn.nodemanager.log-dirs</name>
        <value>/home/mytestzk/hadoop-2.7.4/log</value>
    </property>
    <property>
        <name>mapreduce.shuffle.port</name>
        <value>23080</value>
    </property>

    <!--故障处理类-->
    <property>
        <name>yarn.client.failover-proxy-provider</name>
        <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name>
        <value>/yarn-leader-election</value>
        <description>Optionalsetting.Thedefaultvalueis/yarn-leader-election</description>
    </property>
</configuration>

 

2:启动YARN

正常启动HDFS后,因为配置了master及slave

为ResourceManager,所以需要分别在master 及 slave1上执行命令(注:需要在每个ResourceManager上执行启动命令):

sbin/start-yarn.sh

 

3:查看状态:

从命令行查看状态:bin/yarn rmadmin -getServiceState rm1

或在浏览器中查看:master:8088, 及slave1:8088

4: 验证HA

将active的resourcemanager 进程杀掉 kill -9 {进程id},然后再查看yarn状态变化,可以原来的standby将变为active

 

5:HDFS HA 及YARN HA 命令总结

 

在每个zookeeper server

HDFS HA

在任一个NameNode

YARN HA

在每个ResourceManager

启动

bin/zkServer.sh start

sbin/start-dfs.sh

sbin/start-yarn.sh

停止

bin/zkServer.sh stop

sbin/stop-dfs.sh

sbin/stop-yarn.sh

查看状态

bin/zkServer.sh status

 

bin/yarn rmadmin -getServiceState rm1

 

 

master:50070

master:8088

 

 

slave1:50070

slave1:8088

posted @ 2017-12-26 16:10  黎明踏浪号  阅读(1030)  评论(0编辑  收藏  举报