useradd –m hadoop –s /bin/bash
passwd hadoop
增加sudo权限
chmod u+w /etc/sudoers
vi /etc/sudoers
root ALL=(ALL) ALL
hadoop ALL=(ALL) ALL
su hadoop && cd ~
wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.7/hadoop-2.7.7.tar.gz
tar -zxf hadoop-2.7.7.tar.gz
sudo mkdir /usr/local/hadoop
sudo mv hadoop-2.7.7 /usr/local/hadoop
mkdir dfs
mkdir dfs/name
mkdir dfs/data
mkdir tmp
cd /usr/local/hadoop/hadoop-2.7.7/etc/hadoop
涉及到的配置文件有7个:
安装路径/hadoop-2.7.7/etc/hadoop/hadoop-env.sh
安装路径/hadoop-2.7.7/etc/hadoop/yarn-env.sh
安装路径/hadoop-2.7.7/etc/hadoop/slaves
安装路径/hadoop-2.7.7/etc/hadoop/core-site.xml
安装路径/hadoop-2.7.7/etc/hadoop/hdfs-site.xml
安装路径/hadoop-2.7.7/etc/hadoop/mapred-site.xml
安装路径/hadoop-2.7.7/etc/hadoop/yarn-site.xml
安装路径/hadoop-2.7.7/etc/hadoop/hadoop-env.sh
安装路径/hadoop-2.7.7/etc/hadoop/yarn-env.sh
安装路径/hadoop-2.7.7/etc/hadoop/slaves
安装路径/hadoop-2.7.7/etc/hadoop/core-site.xml
安装路径/hadoop-2.7.7/etc/hadoop/hdfs-site.xml
安装路径/hadoop-2.7.7/etc/hadoop/mapred-site.xml
安装路径/hadoop-2.7.7/etc/hadoop/yarn-site.xml
hadoop-env.sh和yarn-env.sh 里修改JAVA_HOME
===
===
cat core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:8020</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
<description>Allow the superuser oozie to impersonate any members of the group group1 and group2</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
<description>The superuser can connect only from host1 and host2 to impersonate a user</description>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:8020</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
<description>Allow the superuser oozie to impersonate any members of the group group1 and group2</description>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
<description>The superuser can connect only from host1 and host2 to impersonate a user</description>
</property>
</configuration>
===
cat hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>localhost:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
===
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>localhost:9001</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
===
cp mapred-site.xml.template mapred-site.xml
cat mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>localhost:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>localhost:19888</value>
</property>
</configuration>
=======
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>localhost:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>localhost:19888</value>
</property>
</configuration>
=======
cat yarn-site.xml
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:8088</value>
</property>
</configuration>
====
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>localhost:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>localhost:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>localhost:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>localhost:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>localhost:8088</value>
</property>
</configuration>
====
设置环境变量
HADOOP_HOME=/usr/local/hadoop/hadoop-2.7.7
并将其中的bin和sbin加入path中
格式化namenode:hdfs namenode –format或使用hadoop namenode format
$ start-dfs.sh
Starting namenodes on [localhost]
localhost: starting namenode, logging to /usr/local/hadoop/hadoop-2.7.7/logs/hadoop-hadoop-namenode-mhc-linux.out
localhost: starting datanode, logging to /usr/local/hadoop/hadoop-2.7.7/logs/hadoop-hadoop-datanode-mhc-linux.out
Starting secondary namenodes [localhost]
localhost: starting secondarynamenode, logging to /usr/local/hadoop/hadoop-2.7.7/logs/hadoop-hadoop-secondarynamenode-mhc-linux.out
Starting namenodes on [localhost]
localhost: starting namenode, logging to /usr/local/hadoop/hadoop-2.7.7/logs/hadoop-hadoop-namenode-mhc-linux.out
localhost: starting datanode, logging to /usr/local/hadoop/hadoop-2.7.7/logs/hadoop-hadoop-datanode-mhc-linux.out
Starting secondary namenodes [localhost]
localhost: starting secondarynamenode, logging to /usr/local/hadoop/hadoop-2.7.7/logs/hadoop-hadoop-secondarynamenode-mhc-linux.out
启动namenode报错
ulimit -a for user root
stop-dfs.sh
这里我们直接重新格式化namenode,然后启动namenode就可以了
hadoop namenode -format
$ start-dfs.sh
ps -ef|grep node
有三个进程
proc_namenode proc_datanode proc_secondarynamenode
start-yarn.sh
访问http://localhost:8088