部署hadoop

上一次安装好虚拟机

接下来开始部署Hadoop

首先分配一下角色

node1:Namenode、Datanode、ResourceManager、NodeManager、HistoryServer、WebProxyServer、QuorumPeerMain
node2:Datanode、NodeManager、QuorumPeerMain
node3:Datanode、NodeManager、QuorumPeerMain

调整虚拟机内存

node1设置4GB或以上内存

node2和node3设置2GB或以上内存

 

Hadoop集群部署

下载Hadoop安装包、解压、配置软链接

# 1. 下载
wget http://archive.apache.org/dist/hadoop/common/hadoop-3.3.0/hadoop-3.3.0.tar.gz

# 2. 解压
# 请确保目录/export/server存在
tar -zxvf hadoop-3.3.0.tar.gz -C /export/server/

# 3. 构建软链接
ln -s /export/server/hadoop-3.3.0 /export/server/hadoop

修改配置文件:hadoop-env.sh

cd 进入到/export/server/hadoop/etc/hadoop,文件夹中,配置文件都在这里

修改hadoop-env.sh文件

此文件是配置一些Hadoop用到的环境变量

这些是临时变量,在Hadoop运行时有用

如果要永久生效,需要写到/etc/profile中

# 在文件开头加入: # 配置Java安装路径 export JAVA_HOME=/export/server/jdk # 配置Hadoop安装路径 export HADOOP_HOME=/export/server/hadoop # Hadoop hdfs配置文件路径 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop # Hadoop YARN配置文件路径 export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop # Hadoop YARN 日志文件夹 export YARN_LOG_DIR=$HADOOP_HOME/logs/yarn # Hadoop hdfs 日志文件夹 export HADOOP_LOG_DIR=$HADOOP_HOME/logs/hdfs # Hadoop的使用启动用户配置 export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root export YARN_PROXYSERVER_USER=root

 

修改配置文件:core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->
<configuration>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://node1:8020</value>
    <description></description>
  </property>

  <property>
    <name>io.file.buffer.size</name>
    <value>131072</value>
    <description></description>
  </property>
</configuration>

  配置:hdfs-site.xml文件

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
    <property>
        <name>dfs.datanode.data.dir.perm</name>
        <value>700</value>
    </property>

  <property>
    <name>dfs.namenode.name.dir</name>
    <value>/data/nn</value>
    <description>Path on the local filesystem where the NameNode stores the namespace and transactions logs persistently.</description>
  </property>

  <property>
    <name>dfs.namenode.hosts</name>
    <value>node1,node2,node3</value>
    <description>List of permitted DataNodes.</description>
  </property>

  <property>
    <name>dfs.blocksize</name>
    <value>268435456</value>
    <description></description>
  </property>


  <property>
    <name>dfs.namenode.handler.count</name>
    <value>100</value>
    <description></description>
  </property>

  <property>
    <name>dfs.datanode.data.dir</name>
    <value>/data/dn</value>
  </property>
</configuration>

  配置:mapred-env.sh文件

# 在文件的开头加入如下环境变量设置
export JAVA_HOME=/export/server/jdk
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA

  配置:mapred-site.xml文件

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->

<!-- Put site-specific property overrides in this file. -->

<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
    <description></description>
  </property>

  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>node1:10020</value>
    <description></description>
  </property>


  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>node1:19888</value>
    <description></description>
  </property>


  <property>
    <name>mapreduce.jobhistory.intermediate-done-dir</name>
    <value>/data/mr-history/tmp</value>
    <description></description>
  </property>


  <property>
    <name>mapreduce.jobhistory.done-dir</name>
    <value>/data/mr-history/done</value>
    <description></description>
  </property>
<property>
  <name>yarn.app.mapreduce.am.env</name>
  <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
  <name>mapreduce.map.env</name>
  <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
<property>
  <name>mapreduce.reduce.env</name>
  <value>HADOOP_MAPRED_HOME=$HADOOP_HOME</value>
</property>
</configuration>

  配置:yarn-env.sh文件

# 在文件的开头加入如下环境变量设置
export JAVA_HOME=/export/server/jdk
export HADOOP_HOME=/export/server/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
export YARN_LOG_DIR=$HADOOP_HOME/logs/yarn
export HADOOP_LOG_DIR=$HADOOP_HOME/logs/hdfs

配置:yarn-site.xml文件

<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>

<!-- Site specific YARN configuration properties -->
<property>
    <name>yarn.log.server.url</name>
    <value>http://node1:19888/jobhistory/logs</value>
    <description></description>
</property>

  <property>
    <name>yarn.web-proxy.address</name>
    <value>node1:8089</value>
    <description>proxy server hostname and port</description>
  </property>


  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
    <description>Configuration to enable or disable log aggregation</description>
  </property>

  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/tmp/logs</value>
    <description>Configuration to enable or disable log aggregation</description>
  </property>


<!-- Site specific YARN configuration properties -->
  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>node1</value>
    <description></description>
  </property>

  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
    <description></description>
  </property>

  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/data/nm-local</value>
    <description>Comma-separated list of paths on the local filesystem where intermediate data is written.</description>
  </property>


  <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/data/nm-log</value>
    <description>Comma-separated list of paths on the local filesystem where logs are written.</description>
  </property>


  <property>
    <name>yarn.nodemanager.log.retain-seconds</name>
    <value>10800</value>
    <description>Default time (in seconds) to retain log files on the NodeManager Only applicable if log-aggregation is disabled.</description>
  </property>



  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
    <description>Shuffle service that needs to be set for Map Reduce applications.</description>
  </property>
</configuration>

修改workers文件

# 全部内容如下
node1
node2
node3

分发hadoop到其它机器

# 在node1执行
cd /export/server

scp -r hadoop-3.3.0 node2:`pwd`/
scp -r hadoop-3.3.0 node2:`pwd`/

在node2、node3执行

# 创建软链接
ln -s /export/server/hadoop-3.3.0 /export/server/hadoop

 

创建所需目录

mkdir -p /data/nn
mkdir -p /data/dn
mkdir -p /data/nm-log
mkdir -p /data/nm-local

配置环境变量在node1、node2、node3修改/etc/profile

export HADOOP_HOME=/export/server/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin

格式化NameNode,在node1执行

hadoop namenode -format

启动hadoop的hdfs集群,在node1执行即可

start-dfs.sh

# 如需停止可以执行
stop-dfs.sh

 

 

 

 

 

 

posted @ 2024-07-13 21:43  财神给你送元宝  阅读(8)  评论(0编辑  收藏  举报