hadoop完全分布式部署示例

部署规划

IP host namenode datanode ResourceManager NodeManager
127.0.0.1 Hadoop01 * *
127.0.0.2 Hadoop02 * *
127.0.0.3 Hadoop03 * * * *

目录规划

/data/bigdata/hadoop-3.3.5
/data/bigdata/store/hadoop
/data/bigdata/store/journal/data
/data/bigdata/logs/hadoop

1、添加用户

groupadd hadoop
groupadd supergroup
useradd -m -g hadoop hdfs
useradd -m -g hadoop -G supergroup yarn

2、设置添加的用户密码

3、设置host地址

127.0.0.1 Hadoop01
127.0.0.2 Hadoop02
127.0.0.3 Hadoop03

3、设置hdfs、yarn账号3台机器互相免密

ssh-keygen -t rsa

ssh-copy-id -i ~/.ssh/id_rsa.pub Hadoop01
ssh-copy-id -i ~/.ssh/id_rsa.pub Hadoop02
ssh-copy-id -i ~/.ssh/id_rsa.pub Hadoop03

4、上传文件到目录

cd /data/bigdata

5、新建hadoop使用的目录

mkdir -p /data/bigdata/store/hadoop
mkdir -p /data/bigdata/store/journal/data
mkdir -p /data/bigdata/logs/hadoop

6、设置目录权限

chown -R root.hadoop /data/bigdata
chmod -R g+w /data/bigdata
chmod -R g+w /data/bigdata

7、设置环境变量

export HADOOP_HOME=/data/bigdata/hadoop-3.3.5
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
source /etc/profile

8、设置配置文件:core-site.xml

<configuration>
	<property>
        <name>fs.defaultFS</name>
        <value>hdfs://Hadoop03:8020</value>
	</property>
    <property>
		<name>hadoop.tmp.dir</name>
		<value>/data/bigdata/store/hadoop</value>
	</property>
	<property>
		<name>hadoop.proxyuser.hive.hosts</name>
		<value>Hadoop01,Hadoop02,Hadoop03</value>
	</property>
	<property>
		<name>hadoop.proxyuser.hive.groups</name>
		<value>*</value>
	</property>
</configuration>

9、设置配置文件:hadoop-env.sh

export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk
export HADOOP_HOME=/data/hadoop-3.3.5
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_HEAPSIZE_MAX=4g
export HADOOP_HEAPSIZE_MIN=512m
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
export HADOOP_LOG_DIR=/data/bigdata/logs/hadoop

export HDFS_NAMENODE_USER=hdfs
export HDFS_DATANODE_USER=hdfs
export HDFS_JOURNALNODE_USER=hdfs
export HDFS_ZKFC_USER=hdfs

10、设置配置文件:hdfs-site.xml

<configuration>
   <property>
        <name>dfs.replication</name>
        <value>3</value>
    </property>
    <property>
        <name>dfs.namenode.http-address</name>
        <value>0.0.0.0:50070</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/data/bigdata/store/journal/data</value>
    </property>
    <property>
        <name>dfs.permissions.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.namenode.acls.enabled</name>
        <value>true</value>
    </property>
</configuration>

11、设置配置文件:mapred-site.xml

<configuration>
	<!-- 设置MR程序默认运行模式,yarn集群模式,local本地模式 -->
	<property>
		<name>mapreduce.framework.name</name>
		<value>yarn</value>
	</property>

	<!-- MR程序历史服务地址 -->
	<property>
		<name>mapreduce.jobhistory.address</name>
		<value>Hadoop01:10020</value>
	</property>

	<!-- MR程序历史服务web端地址 -->
	<property>
		<name>mapreduce.jobhistory.webapp.address</name>
		<value>Hadoop01:19888</value>
	</property>

	<!-- yarn环境变量 -->
	<property>
		<name>yarn.app.mapreduce.am.env</name>
		<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
	</property>

	<!-- map环境变量 -->
	<property>
		<name>mapreduce.map.env</name>
		<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
	</property>

	<!-- reduce环境变量 -->
	<property>
		<name>mapreduce.reduce.env</name>
		<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
	</property>

	<!-- 分配给map容器的内存大小 -->
	<property>
		<name>mapreduce.map.memory.mb</name>
		<value>8000</value>
	</property>

	<!-- 分配给reduce容器的内存大小 -->
	<property>
		<name>mapreduce.reduce.memory.mb</name>
		<value>8000</value>
	</property>	
	
	<property>
		<name>mapreduce.job.counters.max</name>
		<value>500</value>
	</property>
</configuration>

12、设置配置文件:yarn-env.sh

追加配置:
export YARN_RESOURCEMANAGER_USER=yarn
export YARN_NODEMANAGER_USER=yarn

13、设置配置文件:yarn-site.xml

<configuration>
	<!-- 设置YARN集群主角色运行节点rm1-->
	<property>
		<name>yarn.resourcemanager.hostname</name>
		<value>Hadoop03</value>
	</property>
	
	<!--ResourceManager的Web页面访问地址-->
	<property>
		<name>yarn.resourcemanager.webapp.address</name>
		<value>${yarn.resourcemanager.hostname}:8088</value>
	</property>

	<!--启用ResouerceManager重启的功能,默认为false-->
	<property>
		<name>yarn.resourcemanager.recovery.enabled</name>
		<value>true</value>
	</property>

	<!--启用资源抢占功能,默认为false-->
	<property>
		<name>yarn.resourcemanager.scheduler.monitor.enable</name>
		<value>false</value>
	</property>
	
	<property>
		<name>yarn.nodemanager.aux-services</name>
		<value>mapreduce_shuffle</value>
	</property>

	<!--启用NodeManager重启的功能,默认为false-->
	<property>
		<name>yarn.nodemanager.recovery.enabled</name>
		<value>true</value>
	</property>

	<property>
		<name>yarn.nodemanager.address</name>
		<value>0.0.0.0:45454</value>
	</property>

	<property>
		<name>yarn.nodemanager.recovery.supervised</name>
		<value>true</value>
	</property>

	<!-- 是否将对容器实施物理内存限制 -->
	<property>
		<name>yarn.nodemanager.pmem-check-enabled</name>
		<value>false</value>
	</property>

	<!-- 是否将对容器实施虚拟内存限制 -->
	<property>
		<name>yarn.nodemanager.vmem-check-enabled</name>
		<value>false</value>
	</property>

<!-- 开启日志聚集 -->
	<property>
		<name>yarn.log-aggregation-enable</name>
		<value>true</value>
	</property>

	<property>
		<name>yarn.nodemanager.remote-app-log-dir</name>
		<value>/app-logs</value>
	</property>

	<property>
		<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
		<value>logs</value>
	</property>

	<!-- 设置yarn历史服务器地址 -->
	<property>
		<name>yarn.log.server.url</name>
		<value>http://Hadoop01:19888/jobhistory/logs</value>
	</property>

	<!-- 设置yarn历史日志保存时间 7天 -->
	<property>
		<name>yarn.log-aggregation.retain-seconds</name>
		<value>604880</value>
	</property>

	<!-- 容器资源分配 -->
	<!-- 容器可分配总核数 -->
	<property>
		<name>yarn.nodemanager.resource.cpu-vcores</name>
		<value>10</value>
	</property>
	<!-- 容器可分配总物理内存MB -->
	<property>
		<name>yarn.nodemanager.resource.memory-mb</name>
		<value>25600</value>
	</property>
	<!-- 单个容器最小分配内存MB -->
	<property>
		<name>yarn.scheduler.minimum-allocation-mb</name>
		<value>512</value>
	</property>
	<!-- 单个容器最大分配内存MB -->
	<property>
		<name>yarn.scheduler.maximum-allocation-mb</name>
		<value>8224</value>
	</property>
	<!-- 单个容器最大分配核数 -->
	<property>
		<name>yarn.scheduler.maximum-allocation-vcores</name>
		<value>2</value>
	</property>
</configuration>

14、设置worker节点配置文件:workers

Hadoop01
Hadoop02
Hadoop03

15、格式化namenode

bin/hdfs namenode -format

16、启动hdfs

sbin/start-dfs.sh
sbin/stop-dfs.sh

17、启动yarn

sbin/start-yarn.sh
sbin/stop-yarn.sh

18、单独启动journalnode

hdfs --daemon start journalnode

18、启动yarn历史服务器

sbin/mr-jobhistory-daemon.sh start historyserver

19、启动停止hdfs、yarn

/data/hadoop-3.3.5/sbin/stop-dfs.sh 

/data/hadoop-3.3.5/sbin/start-dfs.sh 

/data/hadoop-3.3.5/sbin/stop-yarn.sh 

/data/hadoop-3.3.5/sbin/start-yarn.sh 

20、节点状态查看

bin/hdfs haadmin -getServiceState namenode1

bin/hdfs haadmin -failover -forceactive namenode2 namenode1

21、相关命令、

启动jobhistory
sbin/mr-jobhistory-daemon.sh start historyserver
#HDFS同步用户组
bin/hdfs dfsadmin -refreshUserToGroupsMappings
bin/hdfs haadmin -getServiceState namenode2
bin/hdfs namenode -initializeSharedEdits
#备namenode
bin/hdfs  namenode -bootstrapStandby
hdfs getconf -journalNodes
hdfs dfsadmin -refreshUserToGroupsMappings
hdfs dfs -chown hdfs:hadoop /apps
hdfs dfs -chown hdfs:hadoop /apps/tez-0.10.2
hdfs dfs -chmod 775 /apps/tez-0.10.2
hdfs dfs -put /data/tez.tar.gz /apps/tez-0.10.2
hdfs dfs -ls /apps/tez-0.10.2
hdfs dfs -chmod a+w /apps/tez-0.10.2/tez.tar.gz
hdfs dfs -ls /apps/tez-0.10.2
hdfs dfs -chmod 733 /tmp/hive
hdfs dfs -mkdir /spark2-history
hdfs dfs -chown spark:hadoop /spark2-history

bin/hdfs zkfc -formatZK
bin/hdfs --config "/data/hadoop-3.3.5/etc/hadoop" --hostnames "CQA-L0668036 CQA-L0668037 CQA-L0668038" --workers --daemon start journalnode
posted @   rbcd  阅读(13)  评论(0编辑  收藏  举报
编辑推荐:
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· Linux系列:如何用 C#调用 C方法造成内存泄露
· AI与.NET技术实操系列(二):开始使用ML.NET
阅读排行:
· 被坑几百块钱后,我竟然真的恢复了删除的微信聊天记录!
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 园子的第一款AI主题卫衣上架——"HELLO! HOW CAN I ASSIST YOU TODAY
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
点击右上角即可分享
微信分享提示