hadoop生态搭建(3节点)-04.hadoop配置
如果之前没有安装jdk和zookeeper,安装了的请直接跳过
# https://www.oracle.com/technetwork/java/javase/downloads/java-archive-javase8-2177648.html
# ==================================================================安装 jdk
mkdir -p /usr/java tar -zxvf ~/jdk-8u111-linux-x64.tar.gz -C /usr/java rm -r ~/jdk-8u111-linux-x64.tar.gz
# http://archive.apache.org/dist/zookeeper/
# ==================================================================安装 zookeeper
# zookeeper集群搭建要至少3台服务器,服务器都要部署zookeeper
tar -zxvf ~/zookeeper-3.4.12.tar.gz -C /usr/local rm -r ~/zookeeper-3.4.12.tar.gz
# http://archive.apache.org/dist/hadoop/core/hadoop-2.7.6/
# ==================================================================安装 hadoop
tar -zxvf ~/hadoop-2.7.6.tar.gz -C /usr/local rm -r ~/hadoop-2.7.6.tar.gz
# 配置环境变量
# ==================================================================node1 node2 node3
vi /etc/profile # 在export PATH USER LOGNAME MAIL HOSTNAME HISTSIZE HISTCONTROL下添加 export JAVA_HOME=/usr/java/jdk1.8.0_111 export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.12 export HADOOP_HOME=/usr/local/hadoop-2.7.6 export PATH=$PATH:$JAVA_HOME/bin:$JAVA_HOME/jre/bin:$ZOOKEEPER_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar export HADOOP_INSTALL=$HADOOP_HOME export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
# ==================================================================node1
# 使环境变量生效 source /etc/profile # 查看配置结果 echo $HADOOP_HOME # hadoop 配置 hadoop-env.sh vi $HADOOP_HOME/etc/hadoop/hadoop-env.sh export JAVA_HOME=/usr/java/jdk1.8.0_111 #export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib:$HADOOP_HOME/lib/native" export HADOOP_OPTS="-Djava.library.path=/usr/local/hadoop-2.7.6/lib:/usr/local/hadoop-2.7.6/lib/native" export HADOOP_PID_DIR=/usr/local/hadoop-2.7.6/pids # export HADOOP_OPTS="$HADOOP_OPTS -Duser.timezone=GMT+08" # hadoop 配置 mapred-env.sh vi $HADOOP_HOME/etc/hadoop/mapred-env.sh export JAVA_HOME=/usr/java/jdk1.8.0_111 export HADOOP_MAPRED_PID_DIR=/usr/local/hadoop-2.7.6/pids # hadoop 配置 yarn-env.sh 修改 yarn-env.sh 中 JAVA_HEAP_MAX=-Xmx3072m 改为3G vi $HADOOP_HOME/etc/hadoop/yarn-env.sh export JAVA_HOME=/usr/java/jdk1.8.0_111 JAVA_HEAP_MAX=3G # YARN_OPTS="$YARN_OPTS -Duser.timezone=GMT+08" vi $HADOOP_HOME/etc/hadoop/slaves node1 node2 node3 # 配置文件中对应需创建的文件夹 mkdir $HADOOP_HOME/tmp mkdir $HADOOP_HOME/dfs mkdir $HADOOP_HOME/dfs/name mkdir $HADOOP_HOME/dfs/data mkdir $HADOOP_HOME/logs mkdir $HADOOP_HOME/journal mkdir $HADOOP_HOME/journal/data mkdir $HADOOP_HOME/yarn mkdir $HADOOP_HOME/yarn/local mkdir $HADOOP_HOME/yarn/logs
# core-site.xml
vi $HADOOP_HOME/etc/hadoop/core-site.xml
<configuration> <!-- 指定hdfs的nameservice为cluster --> <property> <name>fs.defaultFS</name> <value>hdfs://appcluster</value> </property> <!-- 指定zookeeper地址--> <property> <name>ha.zookeeper.quorum</name> <value>node1:2181,node2:2181,node3:2181</value> </property> <property> <name>ha.zookeeper.session-timeout.ms</name> <value>2000</value> </property> <!-- 故障检查时间 --> <property> <name>ha.failover-controller.cli-check.rpc-timeout.ms</name> <value>60000</value> </property> <!-- ipc通讯超时时间 --> <property> <name>ipc.client.connect.timeout</name> <value>20000</value> </property> <!-- 指定hadoop临时目录 --> <property> <name>hadoop.tmp.dir</name> <value>/usr/local/hadoop-2.7.6/tmp</value> </property> <property> <name>hadoop.security.authorization</name> <value>true</value> </property> <property> <name>hadoop.security.authentication</name> <value>simple</value> </property> <property> <name>hadoop.proxyuser.super.hosts</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.super.groups</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.root.groups</name> <value>*</value> </property> <property> <name>hadoop.proxyuser.root.hosts</name> <value>*</value> </property> <property> <name>hadoop.native.lib</name> <value>true</value> <description>Should native hadoop libraries, if present, be used.</description> </property> </configuration>
# hdfs-site.xml
vi $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<configuration> <!-- 指定namenode元数据存放路径;如果机器上有多块硬盘,推荐配置多个路径用逗号分隔 --> <property> <name>dfs.namenode.name.dir</name> <value>/usr/local/hadoop-2.7.6/dfs/name</value> </property> <!-- 指定datanode数据存储地址 --> <property> <name>dfs.datanode.data.dir</name> <value>/usr/local/hadoop-2.7.6/dfs/data</value> </property> <!-- 指定数据冗余份数,默认3份 --> <property> <name>dfs.replication</name> <value>2</value> </property> <!--指定hdfs的nameservice为cluster,需要和core-site.xml中的保持一致 --> <property> <name>dfs.nameservices</name> <value>appcluster</value> </property> <!-- cluster下面有两个NameNode,分别是nn1,nn2 --> <property> <name>dfs.ha.namenodes.appcluster</name> <value>nn1,nn2</value> </property> <!-- nn1、nn2的RPC通信地址 --> <property> <name>dfs.namenode.rpc-address.appcluster.nn1</name> <value>node1:8020</value> </property> <property> <name>dfs.namenode.rpc-address.appcluster.nn2</name> <value>node2:8020</value> </property> <!-- nn1、nn2的http通信地址 --> <property> <name>dfs.namenode.http-address.appcluster.nn1</name> <value>node1:50070</value> </property> <property> <name>dfs.namenode.http-address.appcluster.nn2</name> <value>node2:50070</value> </property> <property> <name>dfs.namenode.servicerpc-address.appcluster.nn1</name> <value>node1:53310</value> </property> <property> <name>dfs.namenode.servicerpc-address.appcluster.nn2</name> <value>node2:53310</value> </property> <!-- 指定NameNode的元数据在JournalNode上的存放位置 --> <property> <name>dfs.namenode.shared.edits.dir</name> <value>qjournal://node1:8485;node2:8485;node3:8485/appcluster</value> </property> <!-- 指定JournalNode在本地磁盘存放数据的位置 --> <property> <name>dfs.journalnode.edits.dir</name> <value>/usr/local/hadoop-2.7.6/journal/data</value> </property> <!-- 开启NameNode失败自动切换 --> <property> <name>dfs.ha.automatic-failover.enabled.appcluster</name> <value>true</value> </property> <!-- 配置失败自动切换实现方式 --> <property> <name>dfs.client.failover.proxy.provider.appcluster</name> <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value> </property> <!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行 --> <property> <name>dfs.ha.fencing.methods</name> <value> sshfence shell(/bin/true) </value> </property> <property> <name>dfs.ha.fencing.ssh.private-key-files</name> <value>/root/.ssh/id_rsa</value> </property> <property> <name>dfs.ha.fencing.ssh.connect-timeout</name> <value>30000</value> </property> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> <property> <name>dfs.permissions.enable</name> <value>false</value> </property> <property> <name>dfs.permissions</name> <value>false</value> </property> <!-- 提供web访问hdfs的权限 --> <property> <name>dfs.webhdfs.enabled</name> <value>true</value> </property> </configuration>
# mapred-site.xml
cp $HADOOP_HOME/etc/hadoop/mapred-site.xml.template $HADOOP_HOME/etc/hadoop/mapred-site.xml
vi $HADOOP_HOME/etc/hadoop/mapred-site.xml
<configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
# yarn-site.xml
vi $HADOOP_HOME/etc/hadoop/yarn-site.xml
<configuration> <property> <name>dfs.ha.automatic-failover.enabled.appcluster</name> <value>true</value> </property> <!--rm失联后重新链接的时间--> <property> <name>yarn.resourcemanager.connect.retry-interval.ms</name> <value>2000</value> </property> <!--开启resourcemanagerHA,默认为false--> <property> <name>yarn.resourcemanager.ha.enabled</name> <value>true</value> </property> <!--配置resourcemanager--> <property> <name>yarn.resourcemanager.ha.rm-ids</name> <value>rm1,rm2</value> </property> <property> <name>ha.zookeeper.quorum</name> <value>node1:2181,node2:2181,node3:2181</value> </property> <!--开启故障自动切换--> <property> <name>yarn.resourcemanager.ha.automatic-failover.enabled</name> <value>true</value> </property> <property> <name>yarn.resourcemanager.hostname.rm1</name> <value>node1</value> </property> <property> <name>yarn.resourcemanager.hostname.rm2</name> <value>node2</value> </property> <!-- 在node1上配置rm1,在node2上配置rm2, 注意:一般都喜欢把配置好的文件远程复制到其它机器上,但这个在YARN的另一个机器上一定要修改 --> <property> <name>yarn.resourcemanager.ha.id</name> <value>rm1</value> <description>If we want to launch more than one RM in single node,we need this configuration</description> </property> <!--开启自动恢复功能--> <property> <name>yarn.resourcemanager.recovery.enabled</name> <value>true</value> </property> <!--配置与zookeeper的连接地址--> <property> <name>yarn.resourcemanager.zk-state-store.address</name> <value>node1:2181,node2:2181,node3:2181</value> </property> <property> <name>yarn.resourcemanager.store.class</name> <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> </property> <property> <name>yarn.resourcemanager.zk-address</name> <value>node1:2181,node2:2181,node3:2181</value> </property> <property> <name>yarn.resourcemanager.cluster-id</name> <value>appcluster-yarn</value> </property> <!--schelduler失联等待连接时间--> <property> <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name> <value>5000</value> </property> <!--配置rm1,rm2--> <property> <name>yarn.resourcemanager.address.rm1</name> <value>node1:8032</value> </property> <property> <name>yarn.resourcemanager.address.rm2</name> <value>node2:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm1</name> <value>node1:8030</value> </property> <property> <name>yarn.resourcemanager.scheduler.address.rm2</name> <value>node2:8030</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm1</name> <value>node1:8088</value> </property> <property> <name>yarn.resourcemanager.webapp.address.rm2</name> <value>node2:8088</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm1</name> <value>node1:8031</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address.rm2</name> <value>node2:8031</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm1</name> <value>node1:8033</value> </property> <property> <name>yarn.resourcemanager.admin.address.rm2</name> <value>node2:8033</value> </property> <property> <name>yarn.resourcemanager.ha.admin.address.rm1</name> <value>node1:23142</value> </property> <property> <name>yarn.resourcemanager.ha.admin.address.rm2</name> <value>node2:23142</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.nodemanager.local-dirs</name> <value>/usr/local/hadoop-2.7.6/yarn/local</value> </property> <property> <name>yarn.nodemanager.log-dirs</name> <value>/usr/local/hadoop-2.7.6/yarn/logs</value> </property> <property> <name>mapreduce.shuffle.port</name> <value>23080</value> </property> <!--故障处理类--> <property> <name>yarn.client.failover-proxy-provider</name> <value>org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider</value> </property> <property> <name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name> <value>/yarn-leader-election</value> <description>Optionalsetting.Thedefaultvalueis/yarn-leader-election</description> </property> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>2048</value> </property> <property> <name>yarn.scheduler.maximum-allocation-mb</name> <value>2048</value> </property> <property> <name>yarn.nodemanager.resource.cpu-vcores</name> <value>1</value> </property> </configuration>
# 修改日志的路径
vi $HADOOP_HOME/etc/hadoop/log4j.properties hadoop.log.dir=/usr/local/hadoop-2.7.6/logs
# ==================================================================node1
scp -r $HADOOP_HOME node2:/usr/local/ scp -r $HADOOP_HOME node3:/usr/local/
# ==================================================================node2 node3
# 使环境变量生效 source /etc/profile # 查看配置结果 echo $HADOOP_HOME
# ==================================================================node2
vi $HADOOP_HOME/etc/hadoop/yarn-site.xml
<property> <name>yarn.resourcemanager.ha.id</name> <value>rm2</value> </property>
shutdown -h now
# 快照 hadoop集群前
# ==================================================================初次启动
# 1.启动zookeeper集群所有节点 # ==================================================================node1 node2 node3 zkServer.sh start zkServer.sh status # zkServer.sh stop # $ZOOKEEPER_HOME/bin/zkServer.sh start # $ZOOKEEPER_HOME/bin/zkServer.sh status # $ZOOKEEPER_HOME/bin/zkServer.sh stop # 2.格式化Zookeeper文件系统在active的namenode上运行 # ==================================================================node1 $HADOOP_HOME/bin/hdfs zkfc -formatZK # 验证:$ZOOKEEPER_HOME/bin/zkCli.sh # ls / # ls /hadoop-ha # 3.启动JournalNode集群 # ==================================================================node1 node2 node3 $HADOOP_HOME/sbin/hadoop-daemon.sh start journalnode # jps # QuorumPeerMain # Jps # JournalNode # 4.格式化集群的 NameNode # ==================================================================node1 $HADOOP_HOME/bin/hdfs namenode -format # 如果不是首次format的话还是把NameNode和DataNode存放数据地址下的数据手动删除一下,否则会造成NameNode ID和DataNode ID不一致, # rm -rf $HADOOP_HOME/dfs/name/* & rm -rf $HADOOP_HOME/dfs/data/* # $HADOOP_HOME/bin/hdfs namenode -format # (如果是HDFS联盟,即有多个HDFS集群同时工作,则用hdfs namenode -format -clusterId [clusterID]) # 5.启动 NameNode # ==================================================================node1 $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode $HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager # 6.把NameNode的数据从node1同步到node2中,在standby的namenode格式化namenode,同步NameNode的数据 # ==================================================================node2 $HADOOP_HOME/bin/hdfs namenode -bootstrapStandby $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode $HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager # 7.启动所有的DataNode # ==================================================================node1 $HADOOP_HOME/sbin/hadoop-daemons.sh start datanode # 8.启动Yarn # ==================================================================node1 $HADOOP_HOME/sbin/start-yarn.sh # 9.在node1,node2启动ZooKeeperFailoverController(这里不用在node3中启动,因为node3这个节点是纯粹的DataNode) # ==================================================================node1 node2 $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc # 10.验证HA的故障自动转移是否好用 # kill 进程号 # $HADOOP_HOME/sbin/hadoop-daemon.sh start namenode # 启动后在各个节点查看进程运行情况 # node1 jps #2850 NodeManager #2963 DFSZKFailoverController #3027 Jps #2374 NameNode #2679 ResourceManager #2523 DataNode #2126 QuorumPeerMain #2255 JournalNode # node2 jps #2209 JournalNode #2674 DataNode #2850 NodeManager #2134 QuorumPeerMain #2424 ResourceManager #2973 DFSZKFailoverController #3021 Jps #2318 NameNode # node3 jps #2546 Jps #2131 QuorumPeerMain #2278 DataNode #2200 JournalNode #2446 NodeManager # http://node1:50070/dfshealth.html#tab-overview # http://node2:50070/dfshealth.html#tab-overview # http://node1:8088/cluster/nodes # ==================================================================停止集群 # ==================================================================node1 # stop已经启动的进程 $HADOOP_HOME/sbin/stop-all.sh # ==================================================================node1 node2 node3 # 停止 zookeeper zkServer.sh stop # ==================================================================node1 $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc # ==================================================================node2 $HADOOP_HOME/sbin/yarn-daemon.sh stop resourcemanager $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc
# ==================================================================再次顺序启动方式
# ==================================================================node1 node2 node3 # 启动 zookeeper zkServer.sh start zkServer.sh status # ==================================================================node1 # 启动hadoop所有进程 $HADOOP_HOME/sbin/start-all.sh $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc # ==================================================================node2 $HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager $HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc hadoop dfsadmin -safemode get # 命令强制离开 # hadoop dfsadmin -safemode leave # ==================================================================停止集群 # ==================================================================node1 # stop已经启动的进程 $HADOOP_HOME/sbin/stop-all.sh # ==================================================================node1 node2 node3 # 停止 zookeeper zkServer.sh stop # ==================================================================node1 $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc # ==================================================================node2 $HADOOP_HOME/sbin/yarn-daemon.sh stop resourcemanager $HADOOP_HOME/sbin/hadoop-daemon.sh stop zkfc shutdown -h now # 快照 hadoop集群
# 先不急着进行simple 认证
# ==================================================================simple 认证
vi $HADOOP_HOME/etc/hadoop/core-site.xml
<property> <name>hadoop.http.filter.initializers</name> <value>org.apache.hadoop.security.AuthenticationFilterInitializer</value> </property> <property> <name>hadoop.http.authentication.type</name> <value>simple</value> </property> <property> <name>hadoop.http.authentication.token.validity</name> <value>3600</value> </property> <property> <name>hadoop.http.authentication.signature.secret.file</name> <value>/usr/local/hadoop-2.7.6/hadoop-http-auth-signature-secret</value> </property> <property> <name>hadoop.http.authentication.cookie.domain</name> <value></value> </property> <property> <name>hadoop.http.authentication.simple.anonymous.allowed</name> <value>false</value> </property>
# 在上述配置的目录 $HADOOP_HOME 下生成文件hadoop-http-auth-signature-secret
echo "hadoop" > $HADOOP_HOME/hadoop-http-auth-signature-secret
scp -r $HADOOP_HOME/etc/hadoop/core-site.xml node2:$HADOOP_HOME/etc/hadoop/core-site.xml
scp -r $HADOOP_HOME/etc/hadoop/core-site.xml node3:$HADOOP_HOME/etc/hadoop/core-site.xml
scp -r $HADOOP_HOME/hadoop-http-auth-signature-secret node2:$HADOOP_HOME/hadoop-http-auth-signature-secret
scp -r $HADOOP_HOME/hadoop-http-auth-signature-secret node3:$HADOOP_HOME/hadoop-http-auth-signature-secret
# ==================================================================再次顺序启动方式
# ==================================================================node1 node2 node3
# 启动 zookeeper
zkServer.sh start
zkServer.sh status
# ==================================================================node1
# 启动hadoop所有进程
$HADOOP_HOME/sbin/start-all.sh
$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc
# ==================================================================node2
$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
$HADOOP_HOME/sbin/hadoop-daemon.sh start zkfc
hadoop dfsadmin -safemode get
# 命令强制离开
# hadoop dfsadmin -safemode leave
# 网页访问
# http://node1:50070?user.name=hadoop
# http://node2:50070?user.name=hadoop
# http://node1:8088?user.name=hadoop/cluster/nodes
shutdown -h now
# 快照 hadoop_simple认证
# ==================================================================Kerberos 认证
# 后续整理好后开放