Hadoop部署配置
上传并解压压缩包
Hadoop配置
配置的文件目录是hadoop安装目录下的etc/bin/hadoop
core-site.xml
<configuration>
<!-- 临时文件存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>自己指定</value>
</property>
<!-- namenode指定,指定文件系统类型:分布式文件系统 -->
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.2.135:9000</value>
</property>
<!-- 缓冲区大小 -->
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<!-- 开启hdfs垃圾桶机制,单位分钟 -->
<property>
<name>fs.trash.interval</name>
<value>10080</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<!-- 指定 Hadoop 辅助名称节点主机配置-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node01:50090</value>
</property>
<!-- 指定namenode访问位置-->
<property>
<name>dfs.namenode.http-address</name>
<value>node01:50070</value>
</property>
<!-- 配置 HDFS 的备份文件数量, 默认数量是3 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 指定datanode数据的存放位置-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas,
file:///export/servers/hadoop-2.7.5/hadoopDatas/datanodeDatas2</value>
</property>
<!-- 指定namenode元数据存放位置,元数据是数据的目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas,
file:///export/servers/hadoop-2.7.5/hadoopDatas/namenodeDatas2</value>
</property>
<property>
<name>dfs.namenode.edits.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/nn/edits</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/snn/name</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>file:///export/servers/hadoop-2.7.5/hadoopDatas/dfs/snn/edits</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>34217728</value>
</property>
</configuration>
hadoop-env.sh(jdk配置)
export JAVA_HOME=/export/servers/jdk1.8.0_271/
mapred-site.xml
<configuration>
<!-- 开启MapReduce小任务模式 -->
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
<!-- 设置历史任务主机和端口 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>node01:10020</value>
</property>
<!-- 设置网页访问历史任务的主机和端口 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node01:19888</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 指定yarn运行的主节点 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node01</value>
</property>
<!-- 指定yarn运行的主机 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 开启日志聚合功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 聚合日志保存时间 -->
<property>
<name>yarn.resourcemanager.retain-seconds</name>
<value>604800</value>
</property>
<!-- 设置yarn集群的内存分配方案 -->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>20480</value>
</property>
<property>
<name>yarn.scheduler.minimunm-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
</property>
</configuration>
mapred-env.sh
export JAVA_HOME=/export/servers/jdk1.8.0_271/
slaves
#从节点配置
node01
node02
node03
配置hadoop环境变量
#hadoop环境变量
export HADOOP_HOME=/export/servers/hadoop-2.7.5
export PATH=:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH