搭建Hadoop伪分布式环境
安装ssh client
yum install openssh-clients openssh-server
ssh localhost //测试是否可以正常登陆
安装java环境
先安装jdk
yum install java-1.8.0-openjdk java-1.8.0-openjdk-devel
输入java和javac //如果输出对应命令帮助,则表明jdk正确安装
配置java环境
vim ~/.bashrc //添加 export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk source ~/.bashrc //生效环境变量 java -version //检测java环境是否生效 $JAVA_HOME/bin/java -version //若两条命令输出的结果一致,且都为我们前面安装的 openjdk-1.8.0 的版本,则表明 JDK 环境已经正确安装并配置
安装Hadoop
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-2.7.6/hadoop-2.7.6.tar.gz tar -zxvf hadoop-2.7.6.tar.gz -C /usr/local/hadoop /usr/local/hadoop/bin/hadoop version //检查hadoop是否安装成功
Hadoop伪分布式环境配置
设置Hadoop的环境变量
vim ~/.bashrc //添加
source ~/.bashrc //生效环境变量
修改Hadoop的配置文件
vim /usr/local/hadoop/etc/hadoop/core-site.xml //修改 <configuration> <property> <name>hadoop.tmp.dir</name> <value>file:/usr/local/hadoop/tmp</value> <description>location to store temporary files</description> </property> <property> <name>fs.defaultFS</name> <value>hdfs://localhost:9000</value> </property> </configuration>
vim /usr/local/hadoop-2.7.6/etc/hadoop/hdfs-site.xml //修改 <configuration> <property> <name>dfs.replication</name> <value>1</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:/usr/local/hadoop/tmp/dfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/usr/local/hadoop/tmp/dfs/data</value> </property> </configuration>
格式化namenode
/usr/local/hadoop-2.7.6/bin/hdfs namenodes -format
启动namenode和namenode守护进程
/usr/local/hadoop/sbin/start-dfs.sh
Jps //检查namenode和datanode是否安装成功
运行Hadoop伪分布式实例
cd /usr/local/hadoop/ ../bin/hdfs dfs -mkdir -p /user/hadoop/input ./bin/hdfs dfs -put ./etc/hadoop/*.xml /user/hadoop/input
/usr/local/hadoop/bin/hdfs dfs -ls /user/hadoop/input //查看上传的HDFS文件