Hadoop-3.3.5单机版安装步骤
1.下载JDK和Hadoop[略]
2.解压[略]
3.创建hadoop数据存储的目录
mkdir -p /home/hadoop/tmp /home/hadoop/hdfs/data /home/hadoop/hdfs/name
4.配置JAVA环境和HADOOP_HOME
vim /etc/profile
添加如下内容
JAVA_HOME=/home/fanqi/jdk8u372-b07
HADOOP_HOME=/home/hadoop/hadoop-3.3.5
PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export JAVA_HOME PATH CLASSPATH HADOOP_HOME
5.刷新环境变量
source /etc/profile
6.配置hostname和host文件
hostnamectl set-hostname {hostName}
vim /etc/hosts
192.168.0.240 {hostName}
7.hadoop-env.sh添加相关配置
vim /home/hadoop/hadoop-3.3.5/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/fanqi/jdk8u372-b07
export HADOOP_HOME=/home/hadoop/hadoop-3.3.5
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
8.修改core-site.xml
vim /home/hadoop/hadoop-3.3.5/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置默认使用的文件系统 Hadoop支持file、HDFS、GFS、ali|Amazon云等文件系统 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://dotnet-debian:9000</value>
</property>
<!-- 设置Hadoop本地保存数据路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
</property>
<!-- 设置HDFS web UI用户身份 -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<!-- 整合hive 用户代理设置 -->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<!-- 文件系统垃圾桶保存时间 -->
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
<!-- 访问文件的IO的缓存大小 -->
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
</configuration>
9.修改hdfs-site.xml
vim /home/hadoop/hadoop-3.3.5/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
<description>副本个数,配置默认是3,应小于datanode机器数量</description>
</property>
<!-- 设置SecondNameNode进程运行机器位置信息 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>dotnet-debian:9870</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.http.address</name>
<value>0.0.0.0:50070</value>
<description>将绑定IP改为0.0.0.0,而不是本地回环IP,这样,就能够实现外网访问本机的50070端口了</description>
</property>
</configuration>
10.修改mapre-site.xml
vim /home/hadoop/hadoop-3.3.5/etc/hadoop/mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<!-- 设置MR程序默认运行模式: yarn集群模式 local本地模式 -->
<property>
<name>mapreduce.framework.name</name>
<value>local</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=${HADOOP_HOME}</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>dotnet-debian:9001</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>1536</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx1024M</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>3072</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2560M</value>
</property>
<!-- MR程序历史服务地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>dotnet-debian:10020</value>
</property>
<!-- MR程序历史服务器web端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>dotnet-debian:19888</value>
</property>
</configuration>
11.修改yarn-site.xml
vim /home/hadoop/hadoop-3.3.5/etc/hadoop/yarn-site.xml
<?xml version="1.0"?>
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 设置YARN集群主角色运行机器位置 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>dotnet-debian</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 是否将对容器实施物理内存限制 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!-- 是否将对容器实施虚拟内存限制。 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 开启日志聚集 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置yarn历史服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://dotnet-debian:19888/jobhistory/logs</value>
</property>
<!-- 历史日志保存的时间 7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
12.修改workers配置文件【略】
非集群环境无需修改
13.start-dfs.sh,stop-dfs.sh两个文件顶部增加以下参数
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
14.start-yarn.sh,stop-yarn.sh两个文件顶部增加以下参数
YARN_RESOURCEMANAGER_USER=root
HDFS_DATANODE_SECURE_USER=yarn
YARN_NODEMANAGER_USER=root
15.集群间免密登录
# 1.生成公钥密钥,全部回车下一步即可
ssh-keygen -t rsa
ssh-keygen -t dsa
# 2.该密钥文件导入本机的已认证密码集中
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
# 3.赋予权限以让密钥可用
chmod 600 ~/.ssh/authorized_keys
chmod 700 ~/.ssh
# 4.将上一步生成的id_rsa.pub文件拷贝到其他slave机器上:
scp ~/.ssh/id_rsa.pub xxx@ip:/home/xxx/.ssh
# 5.登录到每一台slave机器,执行第2步和第3步命令
15.启动Hadoop
首次启动Hadoop的时候,需要进行Namenode格式化
/home/hadoop/hadoop-3.3.5/bin/hdfs namenode -format
- 逐一启动(实际生产环境中的启动方式)
/home/hadoop/hadoop-3.3.5/sbin/hadoop-daemon.sh start|stop namenode|datanode| journalnode| secondarynamenode
/home/hadoop/hadoop-3.3.5/sbin/yarn-daemon.sh start|stop resourcemanager|nodemanager - 分开启动
/home/hadoop/hadoop-3.3.5/sbin/start-dfs.sh
/home/hadoop/hadoop-3.3.5/sbin/start-yarn.sh - 一起启动
/home/hadoop/hadoop-3.3.5/sbin/start-all.sh
start-all.sh实际上是调用sbin/start-dfs.sh脚本和sbin/start-yarn.sh脚本
16.使用jps命令查看启动是否成功
jps
6416 Jps
6275 NodeManager
5796 DataNode
4758 ResourceManager
5686 NameNode
5994 SecondaryNameNode
17.访问web地址
http:ip:50070