hadoop安装
一、解压
tar -zxvf hadoop-xxxx.tar.gz
二、配置文件
1.core-site.xml
<configuration> <!-- Hadoop文件系统依赖的基础配置 --> <property> <name>hadoop.tmp.dir</name> <value>/home/hadoopadmin/hadoop/data</value> </property> <!-- NameNode结点的URI(包括协议、主机名称、端口号) --> <property> <name>fs.defaultFS</name> <value>hdfs://etc01:8020</value> </property> <!-- 开启回收站机制,可以设置文件彻底删除的时间,默认为0,单位为分钟 --> <property> <name>fs.trash.interval</name> <value>60</value> </property> </configuration>
2.hdfs-site.xml
<configuration> <!-- secondarynamenode的http服务器地址和端口 --> <property> <name>dfs.namenode.secondary.http-address</name> <value>etc01:50090</value> </property> <!-- 默认块复制 --> <property> <name>dfs.replication</name> <value>3</value> </property> <!-- 关闭权限校验 --> <property> <name>dfs.permissions</name> <value>false</value> </property> <!-- namenode的http服务器地址和端口 --> <property> <name>dfs.namenode.http-address</name> <value>etc01:50070</value> </property> <!-- datanode结点被指定要存储数据的本地文件系统路径 --> <property> <name>dfs.datanode.data.dir</name> <value>file:///home/hadoopadmin/hadoop/data/dfs/dn</value> </property> </configuration>
3.mapred-site.xml
<configuration> <!-- MapReduce JobHistory进程通信主机、端口 --> <property> <name>mapreduce.jobhistory.address</name> <value>etc01:10020</value> </property> <!-- MapReduce JobHistory的web界面主机、端口 --> <property> <name>mapreduce.jobhistory.webapp.address</name> <value>etc01:19888</value> </property> <!-- 以yarn方式运行MapReduce --> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration>
4.yarn-site.xml
<configuration> <!-- resourcemanager的主机名 --> <property> <name>yarn.resourcemanager.hostname</name> <value>etc01</value> </property> <!-- resourcemanager提供给nodemanager的地址 --> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>etc01:8031</value> </property> <!-- resourcemanager中应用程序管理器界面的地址 --> <property> <name>yarn.resourcemanager.address</name> <value>etc01:8032</value> </property> <!-- 调度器接口的地址 --> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>etc01:8030</value> </property> <!-- 分配给容器的物理内存量(75%) --> <property> <name>yarn.nodemanager.resource.memory-mb</name> <value>1536</value> </property> <!-- NodeManager上运行的附属服务,配置成mapreduce_shuffle才可运行MR --> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> </configuration>
5.hadoop-env.sh slaves .bash_profile
三、配置ssh免密登录
1.生成秘钥 ssh-keygen -t rsa
2.发送本地公钥 ssh-copy-id username@hostname
3.测试登录 ssh username@hostname
四、配置从节点
scp -r /home/hadoopadmin/jdk1.7.0_67 hadoopadmin@etc02:/home/hadoopadmin
scp -r /home/hadoopadmin/jdk1.7.0_67 hadoopadmin@etc03:/home/hadoopadmin
scp -r /home/hadoopadmin/hadoop-2.7.1 hadoopadmin@etc02:/home/hadoopadmin
scp -r /home/hadoopadmin/hadoop-2.7.1 hadoopadmin@etc03:/home/hadoopadmin
scp /home/hadoopadmin/.bash_profile hadoopadmin@etc02:/home/hadoopadmin
scp /home/hadoopadmin/.bash_profile hadoopadmin@etc03:/home/hadoopadmin
-r 代表发送整个目录下的所有文件 :默认用户家目录下,上述指定根目录了