hadoop完全分布式部署示例
部署规划
IP | host | namenode | datanode | ResourceManager | NodeManager |
---|---|---|---|---|---|
127.0.0.1 | Hadoop01 | * | * | ||
127.0.0.2 | Hadoop02 | * | * | ||
127.0.0.3 | Hadoop03 | * | * | * | * |
目录规划
/data/bigdata/hadoop-3.3.5
/data/bigdata/store/hadoop
/data/bigdata/store/journal/data
/data/bigdata/logs/hadoop
1、添加用户
groupadd hadoop
groupadd supergroup
useradd -m -g hadoop hdfs
useradd -m -g hadoop -G supergroup yarn
2、设置添加的用户密码
3、设置host地址
127.0.0.1 Hadoop01
127.0.0.2 Hadoop02
127.0.0.3 Hadoop03
3、设置hdfs、yarn账号3台机器互相免密
ssh-keygen -t rsa
ssh-copy-id -i ~/.ssh/id_rsa.pub Hadoop01
ssh-copy-id -i ~/.ssh/id_rsa.pub Hadoop02
ssh-copy-id -i ~/.ssh/id_rsa.pub Hadoop03
4、上传文件到目录
cd /data/bigdata
5、新建hadoop使用的目录
mkdir -p /data/bigdata/store/hadoop
mkdir -p /data/bigdata/store/journal/data
mkdir -p /data/bigdata/logs/hadoop
6、设置目录权限
chown -R root.hadoop /data/bigdata
chmod -R g+w /data/bigdata
chmod -R g+w /data/bigdata
7、设置环境变量
export HADOOP_HOME=/data/bigdata/hadoop-3.3.5
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile
8、设置配置文件:core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://Hadoop03:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/bigdata/store/hadoop</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>Hadoop01,Hadoop02,Hadoop03</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
</configuration>
9、设置配置文件:hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
export HADOOP_HOME=/data/hadoop-3.3.5
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_HEAPSIZE_MAX=4g
export HADOOP_HEAPSIZE_MIN=512m
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
export HADOOP_LOG_DIR=/data/bigdata/logs/hadoop
export HDFS_NAMENODE_USER=hdfs
export HDFS_DATANODE_USER=hdfs
export HDFS_JOURNALNODE_USER=hdfs
export HDFS_ZKFC_USER=hdfs
10、设置配置文件:hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>0.0.0.0:50070</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/bigdata/store/journal/data</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>true</value>
</property>
</configuration>
11、设置配置文件:mapred-site.xml
<configuration>
<!-- 设置MR程序默认运行模式,yarn集群模式,local本地模式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- MR程序历史服务地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>Hadoop01:10020</value>
</property>
<!-- MR程序历史服务web端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>Hadoop01:19888</value>
</property>
<!-- yarn环境变量 -->
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<!-- map环境变量 -->
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<!-- reduce环境变量 -->
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<!-- 分配给map容器的内存大小 -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>8000</value>
</property>
<!-- 分配给reduce容器的内存大小 -->
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>8000</value>
</property>
<property>
<name>mapreduce.job.counters.max</name>
<value>500</value>
</property>
</configuration>
12、设置配置文件:yarn-env.sh
追加配置:
export YARN_RESOURCEMANAGER_USER=yarn
export YARN_NODEMANAGER_USER=yarn
13、设置配置文件:yarn-site.xml
<configuration>
<!-- 设置YARN集群主角色运行节点rm1-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>Hadoop03</value>
</property>
<!--ResourceManager的Web页面访问地址-->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>${yarn.resourcemanager.hostname}:8088</value>
</property>
<!--启用ResouerceManager重启的功能,默认为false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--启用资源抢占功能,默认为false-->
<property>
<name>yarn.resourcemanager.scheduler.monitor.enable</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--启用NodeManager重启的功能,默认为false-->
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>0.0.0.0:45454</value>
</property>
<property>
<name>yarn.nodemanager.recovery.supervised</name>
<value>true</value>
</property>
<!-- 是否将对容器实施物理内存限制 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!-- 是否将对容器实施虚拟内存限制 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 开启日志聚集 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/app-logs</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<!-- 设置yarn历史服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://Hadoop01:19888/jobhistory/logs</value>
</property>
<!-- 设置yarn历史日志保存时间 7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604880</value>
</property>
<!-- 容器资源分配 -->
<!-- 容器可分配总核数 -->
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>10</value>
</property>
<!-- 容器可分配总物理内存MB -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>25600</value>
</property>
<!-- 单个容器最小分配内存MB -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
</property>
<!-- 单个容器最大分配内存MB -->
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>8224</value>
</property>
<!-- 单个容器最大分配核数 -->
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>2</value>
</property>
</configuration>
14、设置worker节点配置文件:workers
Hadoop01
Hadoop02
Hadoop03
15、格式化namenode
bin/hdfs namenode -format
16、启动hdfs
sbin/start-dfs.sh
sbin/stop-dfs.sh
17、启动yarn
sbin/start-yarn.sh
sbin/stop-yarn.sh
18、单独启动journalnode
hdfs --daemon start journalnode
18、启动yarn历史服务器
sbin/mr-jobhistory-daemon.sh start historyserver
19、启动停止hdfs、yarn
/data/hadoop-3.3.5/sbin/stop-dfs.sh
/data/hadoop-3.3.5/sbin/start-dfs.sh
/data/hadoop-3.3.5/sbin/stop-yarn.sh
/data/hadoop-3.3.5/sbin/start-yarn.sh
20、节点状态查看
bin/hdfs haadmin -getServiceState namenode1
bin/hdfs haadmin -failover -forceactive namenode2 namenode1
21、相关命令、
启动jobhistory
sbin/mr-jobhistory-daemon.sh start historyserver
#HDFS同步用户组
bin/hdfs dfsadmin -refreshUserToGroupsMappings
bin/hdfs haadmin -getServiceState namenode2
bin/hdfs namenode -initializeSharedEdits
#备namenode
bin/hdfs namenode -bootstrapStandby
hdfs getconf -journalNodes
hdfs dfsadmin -refreshUserToGroupsMappings
hdfs dfs -chown hdfs:hadoop /apps
hdfs dfs -chown hdfs:hadoop /apps/tez-0.10.2
hdfs dfs -chmod 775 /apps/tez-0.10.2
hdfs dfs -put /data/tez.tar.gz /apps/tez-0.10.2
hdfs dfs -ls /apps/tez-0.10.2
hdfs dfs -chmod a+w /apps/tez-0.10.2/tez.tar.gz
hdfs dfs -ls /apps/tez-0.10.2
hdfs dfs -chmod 733 /tmp/hive
hdfs dfs -mkdir /spark2-history
hdfs dfs -chown spark:hadoop /spark2-history
bin/hdfs zkfc -formatZK
bin/hdfs --config "/data/hadoop-3.3.5/etc/hadoop" --hostnames "CQA-L0668036 CQA-L0668037 CQA-L0668038" --workers --daemon start journalnode
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本