Hadoop(2.x) 伪分布式搭建

系统环境

/*
操作系统: CentOS 7
主机名:   centos02
IP:      192.168.122.1
Java: 1.8 Hadoop: 2.8.5
*/

 

新建目录 

[root@centos02 opt]# pwd
/opt
[root@centos02 opt]#
[root@centos02 opt]# mkdir -m 777 bigdata
[root@centos02 opt]# mkdir -m 777 bigdata/hadoop
[root@centos02 opt]#

 

解压 Hadoop 到指定目录

[root@centos02 opt]# tar xzf hadoop-2.8.5.tar.gz -C /opt/bigdata/hadoop

 

修改解压的Hadoop文件夹的权限

[root@centos02 opt]# chown -R 777 /opt/bigdata/hadoop/hadoop-2.8.5

 

新建Hadoop临时文件目录

[root@centos02 opt]# cd /opt/bigdata/hadoop/hadoop-2.8.5
[root@centos02 hadoop-2.8.5]# pwd
/opt/bigdata/hadoop/hadoop-2.8.5
[root@centos02 hadoop-2.8.5]#
[root@centos02 hadoop-2.8.5]# mkdir -m 777 hadoop-temp
[root@centos02 hadoop-2.8.5]# mkdir -m 777 hadoop-temp/tmp
[root@centos02 hadoop-2.8.5]# mkdir -m 777 hadoop-temp/name
[root@centos02 hadoop-2.8.5]# mkdir -m 777 hadoop-temp/data
[root@centos02 hadoop-2.8.5]# 
[root@centos02 hadoop-2.8.5]# mkdir -m 777 hadoop-temp/yarn
[root@centos02 hadoop-2.8.5]# mkdir -m 777 hadoop-temp/yarn/logs
[root@centos02 hadoop-2.8.5]#
[root@centos02 hadoop-2.8.5]# cd ./hadoop-temp
[root@centos02 hadoop-temp]# pwd
/opt/bigdata/hadoop/hadoop-2.8.5/hadoop-temp
[root@centos02 hadoop-temp]#

 

添加环境变量

[root@centos02 hadoop-2.8.5]# vim /etc/profile
# Java 
export JAVA_HOME=/usr/local/java/jdk1.8
export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
export PATH=$JAVA_HOME/bin:$PATH

# Hadoop
export HADOOP_HOME=/opt/bigdata/hadoop/hadoop-2.8.5
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop

export HADOOP_COMMON_HOME=$HADOOP_HOME 
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_INSTALL=$HADOOP_HOME

export YARN_HOME=$HADOOP_HOME
export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop

export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
[root@centos02 hadoop-2.8.5]# source /etc/profile

 

修改Hadoop运行环境变量

hadoop-env.sh

[root@centos02 hadoop-2.8.5]# cd ./etc/hadoop
[root@centos02 hadoop]# 
[root@centos02 hadoop]# vim hadoop-env.sh
# export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/usr/local/java/jdk1.8

 

yarn-env.sh

[root@centos02 hadoop]# vim yarn-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8

 

mapred-env.sh

[root@centos02 hadoop]# vim mapred-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8

 

修改Hadoop配置文件

core-site.xml

[root@centos02 hadoop]# vim core-site.xml
<configuration>
  
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://centos02:9000</value>
  </property>
  
  <property>
    <name>hadoop.home</name>
    <value>/opt/bigdata/hadoop/hadoop-2.8.5</value>
  </property>

  <property>
    <name>hadoop.tmp.dir</name>
    <value>${hadoop.home}/hadoop-temp/tmp</value>
  </property>

  
  <property>
    <name>hadoop.security.authorization</name>
    <value>false</value>
    <description>是否启用服务器级别授权</description>
  </property>
  
  <property>
    <name>hadoop.proxyuser.root.hosts</name>
    <value>*</value>
  </property>  
  <property>
    <name>hadoop.proxyuser.root.groups</name>
    <value>*</value>
  </property>
  
</configuration>

 

hdfs-site.xml

[root@centos02 hadoop]# vim hdfs-site.xml
<configuration>

  <property>
    <name>dfs.replication</name>
    <value>1</value>
    <description>副本数</description>
  </property>
  
  <property>
    <name>hadoop.home</name>
    <value>/opt/bigdata/hadoop/hadoop-2.8.5</value>
  </property>  

  <property>
    <name>dfs.name.dir</name>
    <value>${hadoop.home}/hadoop-temp/name</value>
  </property>
  <property>
    <name>dfs.data.dir</name>
    <value>${hadoop.home}/hadoop-temp/data</value>
  </property>

  <property>
    <name>dfs.blocksize</name>
    <value>128m</value>
  </property>


  <property>
    <name>dfs.webhdfs.enabled</name>
    <value>true</value>
    <description>是否可以通过web站点进行HDFS管理</description>
  </property>
  
  <property>
    <name>dfs.permissions.enabled</name>
    <value>false</value>
    <description>是否启用HDFS中的权限检查。如果为false,则禁用权限检查</description>
  </property>

</configuration>

 

mapred-site.xml

[root@centos02 hadoop]# mv mapred-site.xml.template mapred-site.xml
[root@centos02 hadoop]# vim mapred-site.xml
<configuration>
  
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
    <description>MapReduce的任务运行在yarn上</description>
  </property>

  
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>centos02:10020</value>
    <description>历史记录服务器地址</description>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>centos02:19888</value>
    <description>历史运行记录的Web地址和端口号</description>
  </property>
  
</configuration>

 

yarn.site.xml

[root@centos02 hadoop]# vim yarn-site.xml
<configuration>

  <property>
    <name>yarn.resourcemanager.hostname</name>
    <value>centos02</value>
    <description>主机名</description>
  </property>

  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
    <description>
      NodeManager上运行的附属服务。
      需配置成mapreduce_shuffle,才可运行MapReduce程序</description>
  </property>
  
  <property>
    <name>hadoop.home</name>
    <value>/opt/bigdata/hadoop/hadoop-2.8.5</value>
  </property>  


  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
    <description>启用日志聚合</description>
  </property>
  <property>
    <name>yarn.log-aggregation-retain-seconds</name>
    <value>604800</value>
    <description>收集的日志的保留时间 604800=7*24*3600</description>
  </property>
  <property>
    <name>yarn.nodemanager.log.retain-second</name>
    <value>604800</value>
    <description>日志的保留时间 604800=7*24*3600</description>
  </property>

  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>${hadoop.home}/hadoop-temp/yarn/logs</value>
  </property>


  <property>
    <name>yarn.resourcemanager.address</name>
    <value>${yarn.resourcemanager.hostname}:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address</name>
    <value>${yarn.resourcemanager.hostname}:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address</name>
    <value>${yarn.resourcemanager.hostname}:8033</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address</name>
    <value>${yarn.resourcemanager.hostname}:8030</value>
  </property>

  
  <property>
    <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property>
  
  <property>
    <name>yarn.nodemanager.vmem-check-enable</name>
    <value>false</value>
    <description>忽略虚拟机内存的检查(非必须配置项)</description>
  </property>
  
</configuration>

 

查看 Hadoop版本

[root@centos02 bin]# hadoop version
Hadoop 2.8.5
Subversion https://git-wip-us.apache.org/repos/asf/hadoop.git -r 0b8464d75227fcee2c6e7f2410377b3d53d3d5f8
Compiled by jdu on 2018-09-10T03:32Z
Compiled with protoc 2.5.0
From source with checksum 9942ca5c745417c14e318835f420733
This command was run using /opt/bigdata/hadoop/hadoop-2.8.5/share/hadoop/common/hadoop-common-2.8.5.jar
[root@centos02 bin]# 

 

格式化 Hadoop

[root@centos02 hadoop]# cd $HADOOP_HOME
[root@centos02 hadoop-2.8.5]# pwd
/opt/bigdata/hadoop/hadoop-2.8.5
[root@centos02 hadoop-2.8.5]# 
[root@centos02 hadoop-2.8.5]# cd ./bin
[root@centos02 bin]# pwd
/opt/bigdata/hadoop/hadoop-2.8.5/bin
[root@centos02 bin]#
[root@centos02 bin]# hdfs namenode -format 
19/08/28 06:45:02 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   user = root
STARTUP_MSG:   host = centos02/192.168.122.1
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 2.8.5
STARTUP_MSG:   classpath = /opt/bigdata/hadoop/hadoop-2.8.5/etc/hadoop:/opt/bigdata/hadoop/hadoop-2.8.5/share/hadoop/common/lib/jersey-server-1.9.jar:...
............
............
............
STARTUP_MSG: build = https://git-wip-us.apache.org/repos/asf/hadoop.git -r 0b8464d75227fcee2c6e7f2410377b3d53d3d5f8; compiled by 'jdu' on 2018-09-10T03:32Z STARTUP_MSG: java = 1.8.0_181 ************************************************************/ 19/08/28 06:45:02 INFO namenode.NameNode: registered UNIX signal handlers for [TERM, HUP, INT] 19/08/28 06:45:02 INFO namenode.NameNode: createNameNode [-format] 19/08/28 06:45:02 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 19/08/28 06:45:02 WARN common.Util: Path /opt/bigdata/hadoop/hadoop-2.8.5/hadoop-temp/name should be specified as a URI in configuration files. Please update hdfs configuration. 19/08/28 06:45:02 WARN common.Util: Path /opt/bigdata/hadoop/hadoop-2.8.5/hadoop-temp/name should be specified as a URI in configuration files. Please update hdfs configuration. Formatting using clusterid: CID-9360eaf7-7648-456a-ab49-b56d0232ce68 19/08/28 06:45:02 INFO namenode.FSEditLog: Edit logging is async:true 19/08/28 06:45:02 INFO namenode.FSNamesystem: KeyProvider: null 19/08/28 06:45:02 INFO namenode.FSNamesystem: fsLock is fair: true 19/08/28 06:45:02 INFO namenode.FSNamesystem: Detailed lock hold time metrics enabled: false 19/08/28 06:45:02 INFO blockmanagement.DatanodeManager: dfs.block.invalidate.limit=1000 19/08/28 06:45:02 INFO blockmanagement.DatanodeManager: dfs.namenode.datanode.registration.ip-hostname-check=true 19/08/28 06:45:02 INFO blockmanagement.BlockManager: dfs.namenode.startup.delay.block.deletion.sec is set to 000:00:00:00.000 19/08/28 06:45:02 INFO blockmanagement.BlockManager: The block deletion will start around 2019 八月 28 06:45:02 19/08/28 06:45:02 INFO util.GSet: Computing capacity for map BlocksMap 19/08/28 06:45:02 INFO util.GSet: VM type = 64-bit 19/08/28 06:45:02 INFO util.GSet: 2.0% max memory 889 MB = 17.8 MB 19/08/28 06:45:02 INFO util.GSet: capacity = 2^21 = 2097152 entries 19/08/28 06:45:02 INFO blockmanagement.BlockManager: dfs.block.access.token.enable=false 19/08/28 06:45:02 INFO blockmanagement.BlockManager: defaultReplication = 1 19/08/28 06:45:02 INFO blockmanagement.BlockManager: maxReplication = 512 19/08/28 06:45:02 INFO blockmanagement.BlockManager: minReplication = 1 19/08/28 06:45:02 INFO blockmanagement.BlockManager: maxReplicationStreams = 2 19/08/28 06:45:02 INFO blockmanagement.BlockManager: replicationRecheckInterval = 3000 19/08/28 06:45:02 INFO blockmanagement.BlockManager: encryptDataTransfer = false 19/08/28 06:45:02 INFO blockmanagement.BlockManager: maxNumBlocksToLog = 1000 19/08/28 06:45:02 INFO namenode.FSNamesystem: fsOwner = root (auth:SIMPLE) 19/08/28 06:45:02 INFO namenode.FSNamesystem: supergroup = supergroup 19/08/28 06:45:02 INFO namenode.FSNamesystem: isPermissionEnabled = false 19/08/28 06:45:02 INFO namenode.FSNamesystem: HA Enabled: false 19/08/28 06:45:02 INFO namenode.FSNamesystem: Append Enabled: true 19/08/28 06:45:03 INFO util.GSet: Computing capacity for map INodeMap 19/08/28 06:45:03 INFO util.GSet: VM type = 64-bit 19/08/28 06:45:03 INFO util.GSet: 1.0% max memory 889 MB = 8.9 MB 19/08/28 06:45:03 INFO util.GSet: capacity = 2^20 = 1048576 entries 19/08/28 06:45:03 INFO namenode.FSDirectory: ACLs enabled? false 19/08/28 06:45:03 INFO namenode.FSDirectory: XAttrs enabled? true 19/08/28 06:45:03 INFO namenode.NameNode: Caching file names occurring more than 10 times 19/08/28 06:45:03 INFO util.GSet: Computing capacity for map cachedBlocks 19/08/28 06:45:03 INFO util.GSet: VM type = 64-bit 19/08/28 06:45:03 INFO util.GSet: 0.25% max memory 889 MB = 2.2 MB 19/08/28 06:45:03 INFO util.GSet: capacity = 2^18 = 262144 entries 19/08/28 06:45:03 INFO namenode.FSNamesystem: dfs.namenode.safemode.threshold-pct = 0.9990000128746033 19/08/28 06:45:03 INFO namenode.FSNamesystem: dfs.namenode.safemode.min.datanodes = 0 19/08/28 06:45:03 INFO namenode.FSNamesystem: dfs.namenode.safemode.extension = 30000 19/08/28 06:45:03 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.window.num.buckets = 10 19/08/28 06:45:03 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.num.users = 10 19/08/28 06:45:03 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.windows.minutes = 1,5,25 19/08/28 06:45:03 INFO namenode.FSNamesystem: Retry cache on namenode is enabled 19/08/28 06:45:03 INFO namenode.FSNamesystem: Retry cache will use 0.03 of total heap and retry cache entry expiry time is 600000 millis 19/08/28 06:45:03 INFO util.GSet: Computing capacity for map NameNodeRetryCache 19/08/28 06:45:03 INFO util.GSet: VM type = 64-bit 19/08/28 06:45:03 INFO util.GSet: 0.029999999329447746% max memory 889 MB = 273.1 KB 19/08/28 06:45:03 INFO util.GSet: capacity = 2^15 = 32768 entries 19/08/28 06:45:03 INFO namenode.FSImage: Allocated new BlockPoolId: BP-1572421829-192.168.122.1-1566945903160 19/08/28 06:45:03 INFO common.Storage: Storage directory /opt/bigdata/hadoop/hadoop-2.8.5/hadoop-temp/name has been successfully formatted. 19/08/28 06:45:03 INFO namenode.FSImageFormatProtobuf: Saving image file /opt/bigdata/hadoop/hadoop-2.8.5/hadoop-temp/name/current/fsimage.ckpt_0000000000000000000 using no compression 19/08/28 06:45:03 INFO namenode.FSImageFormatProtobuf: Image file /opt/bigdata/hadoop/hadoop-2.8.5/hadoop-temp/name/current/fsimage.ckpt_0000000000000000000 of size 321 bytes saved in 0 seconds. 19/08/28 06:45:03 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0 19/08/28 06:45:03 INFO util.ExitUtil: Exiting with status 0 19/08/28 06:45:03 INFO namenode.NameNode: SHUTDOWN_MSG: /************************************************************ SHUTDOWN_MSG: Shutting down NameNode at centos02/192.168.122.1 ************************************************************/ [root@centos02 bin]#

 

启动 Hadoop 

[root@centos02 bin]# cd ../
[root@centos02 hadoop-2.8.5]#
[root@centos02 hadoop-2.8.5]# cd ./sbin
[root@centos02 sbin]# pwd
/opt/bigdata/hadoop/hadoop-2.8.5/sbin
[root@centos02 sbin]# ll
总用量 124
-rwxr-xr-x 1 777 hadoop 2752 9月  10 2018 distribute-exclude.sh
-rwxr-xr-x 1 777 hadoop 6467 9月  10 2018 hadoop-daemon.sh
-rwxr-xr-x 1 777 hadoop 1360 9月  10 2018 hadoop-daemons.sh
-rwxr-xr-x 1 777 hadoop 1640 9月  10 2018 hdfs-config.cmd
-rwxr-xr-x 1 777 hadoop 1427 9月  10 2018 hdfs-config.sh
-rwxr-xr-x 1 777 hadoop 2339 9月  10 2018 httpfs.sh
-rwxr-xr-x 1 777 hadoop 3763 9月  10 2018 kms.sh
-rwxr-xr-x 1 777 hadoop 4134 9月  10 2018 mr-jobhistory-daemon.sh
-rwxr-xr-x 1 777 hadoop 1648 9月  10 2018 refresh-namenodes.sh
-rwxr-xr-x 1 777 hadoop 2145 9月  10 2018 slaves.sh
-rwxr-xr-x 1 777 hadoop 1779 9月  10 2018 start-all.cmd
-rwxr-xr-x 1 777 hadoop 1471 9月  10 2018 start-all.sh
-rwxr-xr-x 1 777 hadoop 1128 9月  10 2018 start-balancer.sh
-rwxr-xr-x 1 777 hadoop 1401 9月  10 2018 start-dfs.cmd
-rwxr-xr-x 1 777 hadoop 3734 9月  10 2018 start-dfs.sh
-rwxr-xr-x 1 777 hadoop 1357 9月  10 2018 start-secure-dns.sh
-rwxr-xr-x 1 777 hadoop 1571 9月  10 2018 start-yarn.cmd
-rwxr-xr-x 1 777 hadoop 1347 9月  10 2018 start-yarn.sh
-rwxr-xr-x 1 777 hadoop 1770 9月  10 2018 stop-all.cmd
-rwxr-xr-x 1 777 hadoop 1462 9月  10 2018 stop-all.sh
-rwxr-xr-x 1 777 hadoop 1179 9月  10 2018 stop-balancer.sh
-rwxr-xr-x 1 777 hadoop 1455 9月  10 2018 stop-dfs.cmd
-rwxr-xr-x 1 777 hadoop 3206 9月  10 2018 stop-dfs.sh
-rwxr-xr-x 1 777 hadoop 1340 9月  10 2018 stop-secure-dns.sh
-rwxr-xr-x 1 777 hadoop 1642 9月  10 2018 stop-yarn.cmd
-rwxr-xr-x 1 777 hadoop 1340 9月  10 2018 stop-yarn.sh
-rwxr-xr-x 1 777 hadoop 4295 9月  10 2018 yarn-daemon.sh
-rwxr-xr-x 1 777 hadoop 1353 9月  10 2018 yarn-daemons.sh
[root@centos02 sbin]# 
[root@centos02 sbin]# jps
4741 Jps
[root@centos02 sbin]#  
[root@centos02 sbin]# start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
19/08/28 06:48:47 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting namenodes on [centos02]
The authenticity of host 'centos02 (192.168.122.1)' can't be established.
ECDSA key fingerprint is SHA256:P1L8h3CQufhYRfgBafwGjB18GugzL+XCi62o2x8cM0I.
ECDSA key fingerprint is MD5:c8:13:aa:73:1e:8b:1b:8e:e2:89:88:1e:b9:e7:7d:83.
Are you sure you want to continue connecting (yes/no)? yes
centos02: Warning: Permanently added 'centos02,192.168.122.1' (ECDSA) to the list of known hosts.
centos02: starting namenode, logging to /opt/bigdata/hadoop/hadoop-2.8.5/logs/hadoop-root-namenode-centos02.out
localhost: starting datanode, logging to /opt/bigdata/hadoop/hadoop-2.8.5/logs/hadoop-root-datanode-centos02.out
Starting secondary namenodes [0.0.0.0]
The authenticity of host '0.0.0.0 (0.0.0.0)' can't be established.
ECDSA key fingerprint is SHA256:P1L8h3CQufhYRfgBafwGjB18GugzL+XCi62o2x8cM0I.
ECDSA key fingerprint is MD5:c8:13:aa:73:1e:8b:1b:8e:e2:89:88:1e:b9:e7:7d:83.
Are you sure you want to continue connecting (yes/no)? yes
0.0.0.0: Warning: Permanently added '0.0.0.0' (ECDSA) to the list of known hosts.
0.0.0.0: starting secondarynamenode, logging to /opt/bigdata/hadoop/hadoop-2.8.5/logs/hadoop-root-secondarynamenode-centos02.out
19/08/28 06:48:47 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
starting yarn daemons
starting resourcemanager, logging to /opt/bigdata/hadoop/hadoop-2.8.5/logs/yarn-root-resourcemanager-centos02.out
localhost: starting nodemanager, logging to /opt/bigdata/hadoop/hadoop-2.8.5/logs/yarn-root-nodemanager-centos02.out
[root@centos02 sbin]# 
[root@centos02 sbin]# jps
5443 ResourceManager
5269 SecondaryNameNode
5062 DataNode
5752 NodeManager
6202 Jps
4907 NameNode
[root@centos02 sbin]# 

 

 

posted @ 2019-08-28 16:59  茗::流  阅读(252)  评论(0编辑  收藏  举报
如有雷同,纯属参考。如有侵犯你的版权,请联系我。