部署hadoop
上一次安装好虚拟机
接下来开始部署Hadoop
首先分配一下角色
node1:Namenode、Datanode、ResourceManager、NodeManager、HistoryServer、WebProxyServer、QuorumPeerMain
node2:Datanode、NodeManager、QuorumPeerMain
node3:Datanode、NodeManager、QuorumPeerMain
调整虚拟机内存
node1设置4GB或以上内存
node2和node3设置2GB或以上内存
Hadoop集群部署
下载Hadoop安装包、解压、配置软链接
# 1. 下载
wget http://archive.apache.org/dist/hadoop/common/hadoop-3.3.0/hadoop-3.3.0.tar.gz
# 2. 解压
# 请确保目录/export/server存在
tar -zxvf hadoop-3.3.0.tar.gz -C /export/server/
# 3. 构建软链接
ln -s /export/server/hadoop-3.3.0 /export/server/hadoop
修改配置文件:hadoop-env.sh
cd 进入到/export/server/hadoop/etc/hadoop,文件夹中,配置文件都在这里
修改hadoop-env.sh文件
此文件是配置一些Hadoop用到的环境变量
这些是临时变量,在Hadoop运行时有用
如果要永久生效,需要写到/etc/profile中
# 在文件开头加入: # 配置Java安装路径 export JAVA_HOME=/export/server/jdk # 配置Hadoop安装路径 export HADOOP_HOME=/export/server/hadoop # Hadoop hdfs配置文件路径 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop # Hadoop YARN配置文件路径 export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop # Hadoop YARN 日志文件夹 export YARN_LOG_DIR=$HADOOP_HOME/logs/yarn # Hadoop hdfs 日志文件夹 export HADOOP_LOG_DIR=$HADOOP_HOME/logs/hdfs # Hadoop的使用启动用户配置 export HDFS_NAMENODE_USER=root export HDFS_DATANODE_USER=root export HDFS_SECONDARYNAMENODE_USER=root export YARN_RESOURCEMANAGER_USER=root export YARN_NODEMANAGER_USER=root export YARN_PROXYSERVER_USER=root
修改配置文件:
core-site.xml
123456789101112131415161718192021222324252627282930<?xml version
=
"1.0"
encoding
=
"UTF-8"
?>
<?xml
-
stylesheet
type
=
"text/xsl"
href
=
"configuration.xsl"
?>
<!
-
-
Licensed under the Apache License, Version
2.0
(the
"License"
);
you may
not
use this
file
except
in
compliance with the License.
You may obtain a copy of the License at
http:
/
/
www.apache.org
/
licenses
/
LICENSE
-
2.0
Unless required by applicable law
or
agreed to
in
writing, software
distributed under the License
is
distributed on an
"AS IS"
BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF
ANY
KIND, either express
or
implied.
See the License
for
the specific language governing permissions
and
limitations under the License. See accompanying LICENSE
file
.
-
-
>
<!
-
-
Put site
-
specific
property
overrides
in
this
file
.
-
-
>
<configuration>
<
property
>
<name>fs.defaultFS<
/
name>
<value>hdfs:
/
/
node1:
8020
<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>io.
file
.
buffer
.size<
/
name>
<value>
131072
<
/
value>
<description><
/
description>
<
/
property
>
<
/
configuration>
配置:
hdfs-site.xml
文件
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354<?xml version
=
"1.0"
encoding
=
"UTF-8"
?>
<?xml
-
stylesheet
type
=
"text/xsl"
href
=
"configuration.xsl"
?>
<!
-
-
Licensed under the Apache License, Version
2.0
(the
"License"
);
you may
not
use this
file
except
in
compliance with the License.
You may obtain a copy of the License at
http:
/
/
www.apache.org
/
licenses
/
LICENSE
-
2.0
Unless required by applicable law
or
agreed to
in
writing, software
distributed under the License
is
distributed on an
"AS IS"
BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF
ANY
KIND, either express
or
implied.
See the License
for
the specific language governing permissions
and
limitations under the License. See accompanying LICENSE
file
.
-
-
>
<!
-
-
Put site
-
specific
property
overrides
in
this
file
.
-
-
>
<configuration>
<
property
>
<name>dfs.datanode.data.
dir
.perm<
/
name>
<value>
700
<
/
value>
<
/
property
>
<
property
>
<name>dfs.namenode.name.
dir
<
/
name>
<value>
/
data
/
nn<
/
value>
<description>Path on the local filesystem where the NameNode stores the namespace
and
transactions logs persistently.<
/
description>
<
/
property
>
<
property
>
<name>dfs.namenode.hosts<
/
name>
<value>node1,node2,node3<
/
value>
<description>
List
of permitted DataNodes.<
/
description>
<
/
property
>
<
property
>
<name>dfs.blocksize<
/
name>
<value>
268435456
<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>dfs.namenode.handler.count<
/
name>
<value>
100
<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>dfs.datanode.data.
dir
<
/
name>
<value>
/
data
/
dn<
/
value>
<
/
property
>
<
/
configuration>
配置:
mapred-env.sh
文件
1234# 在文件的开头加入如下环境变量设置
export JAVA_HOME
=
/
export
/
server
/
jdk
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE
=
1000
export HADOOP_MAPRED_ROOT_LOGGER
=
INFO,RFA
配置:
mapred-site.xml
文件
12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364<?xml version
=
"1.0"
?>
<?xml
-
stylesheet
type
=
"text/xsl"
href
=
"configuration.xsl"
?>
<!
-
-
Licensed under the Apache License, Version
2.0
(the
"License"
);
you may
not
use this
file
except
in
compliance with the License.
You may obtain a copy of the License at
http:
/
/
www.apache.org
/
licenses
/
LICENSE
-
2.0
Unless required by applicable law
or
agreed to
in
writing, software
distributed under the License
is
distributed on an
"AS IS"
BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF
ANY
KIND, either express
or
implied.
See the License
for
the specific language governing permissions
and
limitations under the License. See accompanying LICENSE
file
.
-
-
>
<!
-
-
Put site
-
specific
property
overrides
in
this
file
.
-
-
>
<configuration>
<
property
>
<name>mapreduce.framework.name<
/
name>
<value>yarn<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>mapreduce.jobhistory.address<
/
name>
<value>node1:
10020
<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>mapreduce.jobhistory.webapp.address<
/
name>
<value>node1:
19888
<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>mapreduce.jobhistory.intermediate
-
done
-
dir
<
/
name>
<value>
/
data
/
mr
-
history
/
tmp<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>mapreduce.jobhistory.done
-
dir
<
/
name>
<value>
/
data
/
mr
-
history
/
done<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>yarn.app.mapreduce.am.env<
/
name>
<value>HADOOP_MAPRED_HOME
=
$HADOOP_HOME<
/
value>
<
/
property
>
<
property
>
<name>mapreduce.
map
.env<
/
name>
<value>HADOOP_MAPRED_HOME
=
$HADOOP_HOME<
/
value>
<
/
property
>
<
property
>
<name>mapreduce.
reduce
.env<
/
name>
<value>HADOOP_MAPRED_HOME
=
$HADOOP_HOME<
/
value>
<
/
property
>
<
/
configuration>
配置:
yarn-env.sh
文件# 在文件的开头加入如下环境变量设置 export JAVA_HOME=/export/server/jdk export HADOOP_HOME=/export/server/hadoop export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop export YARN_LOG_DIR=$HADOOP_HOME/logs/yarn export HADOOP_LOG_DIR=$HADOOP_HOME/logs/hdfs配置:
yarn-site.xml
文件
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384<?xml version
=
"1.0"
?>
<!
-
-
Licensed under the Apache License, Version
2.0
(the
"License"
);
you may
not
use this
file
except
in
compliance with the License.
You may obtain a copy of the License at
http:
/
/
www.apache.org
/
licenses
/
LICENSE
-
2.0
Unless required by applicable law
or
agreed to
in
writing, software
distributed under the License
is
distributed on an
"AS IS"
BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF
ANY
KIND, either express
or
implied.
See the License
for
the specific language governing permissions
and
limitations under the License. See accompanying LICENSE
file
.
-
-
>
<configuration>
<!
-
-
Site specific YARN configuration properties
-
-
>
<
property
>
<name>yarn.log.server.url<
/
name>
<value>http:
/
/
node1:
19888
/
jobhistory
/
logs<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>yarn.web
-
proxy.address<
/
name>
<value>node1:
8089
<
/
value>
<description>proxy server hostname
and
port<
/
description>
<
/
property
>
<
property
>
<name>yarn.log
-
aggregation
-
enable<
/
name>
<value>true<
/
value>
<description>Configuration to enable
or
disable log aggregation<
/
description>
<
/
property
>
<
property
>
<name>yarn.nodemanager.remote
-
app
-
log
-
dir
<
/
name>
<value>
/
tmp
/
logs<
/
value>
<description>Configuration to enable
or
disable log aggregation<
/
description>
<
/
property
>
<!
-
-
Site specific YARN configuration properties
-
-
>
<
property
>
<name>yarn.resourcemanager.hostname<
/
name>
<value>node1<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>yarn.resourcemanager.scheduler.
class
<
/
name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler<
/
value>
<description><
/
description>
<
/
property
>
<
property
>
<name>yarn.nodemanager.local
-
dirs<
/
name>
<value>
/
data
/
nm
-
local<
/
value>
<description>Comma
-
separated
list
of paths on the local filesystem where intermediate data
is
written.<
/
description>
<
/
property
>
<
property
>
<name>yarn.nodemanager.log
-
dirs<
/
name>
<value>
/
data
/
nm
-
log<
/
value>
<description>Comma
-
separated
list
of paths on the local filesystem where logs are written.<
/
description>
<
/
property
>
<
property
>
<name>yarn.nodemanager.log.retain
-
seconds<
/
name>
<value>
10800
<
/
value>
<description>Default time (
in
seconds) to retain log files on the NodeManager Only applicable
if
log
-
aggregation
is
disabled.<
/
description>
<
/
property
>
<
property
>
<name>yarn.nodemanager.aux
-
services<
/
name>
<value>mapreduce_shuffle<
/
value>
<description>Shuffle service that needs to be
set
for
Map
Reduce
applications.<
/
description>
<
/
property
>
<
/
configuration>
修改workers文件
# 全部内容如下
node1
node2
node3分发hadoop到其它机器
# 在node1执行
cd /export/serverscp -r hadoop-3.3.0 node2:`pwd`/
scp -r hadoop-3.3.0 node2:`pwd`/在node2、node3执行
# 创建软链接
ln -s /export/server/hadoop-3.3.0 /export/server/hadoop
创建所需目录
mkdir -p /data/nn
mkdir -p /data/dn
mkdir -p /data/nm-log
mkdir -p /data/nm-local配置环境变量在node1、node2、node3修改/etc/profile
export HADOOP_HOME=/export/server/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin格式化NameNode,在node1执行
hadoop namenode -format
启动hadoop的hdfs集群,在node1执行即可
start-dfs.sh
# 如需停止可以执行
stop-dfs.sh
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Manus重磅发布:全球首款通用AI代理技术深度解析与实战指南
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 【自荐】一款简洁、开源的在线白板工具 Drawnix