1.Hadoop(HDFS)环境搭建

使用的Hadoop相关的版本:CDH
CDH相关资源下载地址 https://archive.cloudera.com/cdh5/cdh/5/
Hadoop版本:hadoop-2.6.0-cdh5.16.2
Hadoop下载: wget https://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.16.2.tar.gz

2.Hadoop安装前置要求

Java 1.8+
ssh

3.创建文件夹

mkdir -p /usr/local/hadoop2.6 /usr/local/hadoop2.6/data
cd /usr/local/hadoop2.6

4.下载hadoop

wget https://archive.cloudera.com/cdh5/cdh/5/hadoop-2.6.0-cdh5.16.2.tar.gz

5.解压

tar -zvxf hadoop-2.6.0-cdh5.16.2.tar.gz

hadoop软件包常见目录说明
    bin: hadoop客户端名单
    etc/hadoop: hadoop相关的配置文件存放目录
    sbin: 启动hadoop相关进程脚本
    share: 常用的例子

6.修改配置文件

vim /etc/profile
export HADOOP_HOME=/usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2
export PATH=$HADOOP_HOME/bin:$PATH

source /etc/profile

7.修改配置文件

vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/hadoop-env.sh  yarn-env.sh mapred-env.sh
修改 export JAVA_HOME= ${JAVA_HOME}   为 export JAVA_HOME=/usr/local/java1.8/jdk1.8.0_241

vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/core-site.xml
加入这个配置
<property>
    <name>fs.defaultFS</name>
    <value>hdfs://192.168.107.216:8020</value>
</property>
<property>
   <name>mapreduce.reduce.input.buffer.percent</name>
   <value>0.10</value>
</property>
<property>
   <name>mapreduce.reduce.shuffle.memory.limit.percent</name>
   <value>0.10</value>
</property>
<property>
   <name>mapreduce.reduce.shuffle.input.buffer.percent</name>
   <value>0.10</value>
</property>
<property>
   <name>mapred.child.java.opts</name>
   <value>-Xmx2024m</value>
</property>
<property>
    <name>hadoop.tmp.dir</name>
    <value>/usr/local/hadoop2.6/data</value>
</property>
<property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>${hadoop.tmp.dir}/nm-local-dir</value>
</property>


vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/hdfs-site.xml
<property>
    <name>dfs.replication</name>
    <value>1</value>   # 单机版 就一个节点
</property>
<property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>${hadoop.tmp.dir}/nm-local-dir</value>
</property>

vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/mapred-site.xml
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
  <name>mapreduce.cluster.local.dir</name>
  <value>${hadoop.tmp.dir}/mapred/local</value>
</property>

vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/yarn-site.xml
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<property>
  <name>yarn.nodemanager.local-dirs</name>
  <value>${hadoop.tmp.dir}/nm-local-dir</value>
</property>


vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/slaves/
默认是localhost  可以不改,也可以改成本地ip

8.启动HDFS

第一次执行启动的时候一定要格式化文件系统,后面不要重复执行
hadoop namenode -format
hdfs namenode -format   # 格式化文件系统
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/start-dfs.sh

推荐这样启动
bash hadoop-daemon.sh  start/stop namenode   # 先启动namenode
bash hadoop-daemon.sh  start/stop datanode   # 然后启动datanode

9.验证

jps  # 查看hadoop进程  出现下面三个证明启动成功
5191 SecondaryNameNode
3003 NameNode
3278 DataNode

systemctl stop firewalld  # 关闭防火墙
http://192.168.107.216:50070/   # 直接访问  能访问证明启动成功

10.查看启动日志

cd /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/yarn-root-nodemanager-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-datanode-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-namenode-localhost.localdomain.log | tail -n 100

11.停止HDFS

bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-dfs.sh

推荐这样停止
bash hadoop-daemon.sh  stop datanode   # 先停止datanode
bash hadoop-daemon.sh  stop namenode   # 然后停止namenode

12.hadoop 常用命令

bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/start-dfs.sh
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-dfs.sh
jps
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/start-yarn.sh
bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-yarn.sh

bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-yarn.sh && bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.9.0/sbin/stop-dfs.sh && bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.9.0/sbin/start-dfs.sh && bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.9.0/sbin/start-yarn.sh && jps && bash /home/hadoop/lib/pv.sh


bash /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/sbin/stop-yarn.sh && jps

hdfs namenode -format
hadoop namenode -format

bash /home/hadoop/lib/pv.sh
cd /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/yarn-root-nodemanager-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-datanode-localhost.localdomain.log | tail -n 100
cat /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/logs/hadoop-root-namenode-localhost.localdomain.log | tail -n 100
cd /usr/local/hadoop2.6/data
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/core-site.xml
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/yarn-site.xml
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/mapred-site.xml
vim /usr/local/hadoop2.6/hadoop-2.6.0-cdh5.16.2/etc/hadoop/hdfs-site.xml

job.getConfiguration().setStrings("mapreduce.reduce.shuffle.memory.limit.percent", "0.1");
set mapreduce.reduce.shuffle.memory.limit.percent=0.1;