零起步的Hadoop实践日记(更改hadoop数据存储位置)

用的是阿里云主机,发现系统盘只有20G,但是送了一块130G数据盘(要是给我直接一块150G的系统盘就好了,阿里云的说法是,数据系统分开互不干扰)本来打算要升级硬盘,后来启动了130G硬盘并挂载在某目录下(/ad)。需要修改hadoop配置,不需要修改hive配置。下面是CDH4默认给我们配置的 

(1) /etc/hadoop/conf/hdfs-site.xml 

 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <configuration>
   <property>
     <name>dfs.replication</name>
     <value>1</value>
   </property>
   <property>
     <name>dfs.safemode.extension</name>
     <value>0</value>
   </property>
   <property>
      <name>dfs.safemode.min.datanodes</name>
      <value>1</value>
   </property>
   <property>
      <name>hadoop.tmp.dir</name>
      <value>/var/lib/hadoop-hdfs/cache/${user.name}</value>
   </property>
   <property>
      <name>dfs.namenode.name.dir</name>
      <value>file:///var/lib/hadoop-hdfs/cache/${user.name}/dfs/name</value>
   </property>
   <property>
      <name>dfs.namenode.checkpoint.dir</name>
      <value>file:///var/lib/hadoop-hdfs/cache/${user.name}/dfs/namesecondary</value>
   </property>
   <property>
      <name>dfs.datanode.data.dir</name>
      <value>file:///var/lib/hadoop-hdfs/cache/${user.name}/dfs/data</value>
   </property>
 </configuration>

我实际修改了上图红色部分,修改后为:

 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <configuration>
   <property>
     <name>dfs.replication</name>
     <value>1</value>
   </property>
   <property>
     <name>dfs.safemode.extension</name>
     <value>0</value>
   </property>
   <property>
      <name>dfs.safemode.min.datanodes</name>
      <value>1</value>
   </property>
   <property>
      <name>hadoop.tmp.dir</name>
      <value>/ad/hadoop-hdfs/cache/${user.name}</value>
   </property>
   <property>
      <name>dfs.namenode.name.dir</name>
      <value>file:///ad/hadoop-hdfs/cache/${user.name}/dfs/name</value>
   </property>
   <property>
      <name>dfs.namenode.checkpoint.dir</name>
      <value>file:///ad/hadoop-hdfs/cache/${user.name}/dfs/namesecondary</value>
   </property>
   <property>
      <name>dfs.datanode.data.dir</name>
      <value>file:///ad/hadoop-hdfs/cache/${user.name}/dfs/data</value>
   </property>
 </configuration>

 

(2) /etc/hadoop/conf/mapred-site.xml

 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <configuration>
   <property>
     <name>mapred.job.tracker</name>
     <value>localhost:8021</value>
   </property>
 
   <property>
     <name>mapreduce.framework.name</name>
     <value>yarn</value>
   </property>
 
   <property>
     <name>mapreduce.jobhistory.address</name>
     <value>localhost:10020</value>
   </property>
   <property>
     <name>mapreduce.jobhistory.webapp.address</name>
     <value>localhost:19888</value>
   </property>
 
   <property>
     <description>To set the value of tmp directory for map and reduce tasks.</description>
     <name>mapreduce.task.tmp.dir</name>
     <value>/var/lib/hadoop-mapreduce/cache/${user.name}/tasks</value>
   </property>
 
 </configuration>

实际修改了上图红色部分,修改后为:

 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <configuration>
   <property>
     <name>mapred.job.tracker</name>
     <value>localhost:8021</value>
   </property>
 
   <property>
     <name>mapreduce.framework.name</name>
     <value>yarn</value>
   </property>
 
   <property>
     <name>mapreduce.jobhistory.address</name>
     <value>localhost:10020</value>
   </property>
   <property>
     <name>mapreduce.jobhistory.webapp.address</name>
     <value>localhost:19888</value>
   </property>
 
   <property>
     <description>To set the value of tmp directory for map and reduce tasks.</description>
     <name>mapreduce.task.tmp.dir</name>
     <value>/ad/hadoop-mapreduce/cache/${user.name}/tasks</value>
   </property>
 
 </configuration>

 

(3) /etc/hadoop/conf/mapred-site.xml

 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <configuration>
 <property>
 <name>yarn.resourcemanager.resource-tracker.address</name>
 <value>127.0.0.1:8031</value>
 <description>
 host is the hostname of the resource manager and port is the port on which the NodeManagers contact the  Resource Manager.
 </description>
 </property>
 
   <property>
     <name>yarn.nodemanager.aux-services</name>
     <value>mapreduce.shuffle</value>
   </property>
 
   <property>
     <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
     <value>org.apache.hadoop.mapred.ShuffleHandler</value>
   </property>
 
   <property>
     <name>yarn.log-aggregation-enable</name>
     <value>true</value>
   </property>
 
   <property>
     <name>yarn.dispatcher.exit-on-error</name>
     <value>true</value>
   </property>
 
   <property>
     <description>List of directories to store localized files in.</description>
     <name>yarn.nodemanager.local-dirs</name>
     <value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
   </property>
 
   <property>
     <description>Where to store container logs.</description>
     <name>yarn.nodemanager.log-dirs</name>
     <value>/var/log/hadoop-yarn/containers</value>
   </property>
 
   <property>
     <description>Where to aggregate logs to.</description>
     <name>yarn.nodemanager.remote-app-log-dir</name>
     <value>/var/log/hadoop-yarn/apps</value>
   </property>
 
   <property>
     <description>Classpath for typical applications.</description>
      <name>yarn.application.classpath</name>
      <value>
         $HADOOP_CONF_DIR,
         $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
         $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
         $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
         $YARN_HOME/*,$YARN_HOME/lib/*
      </value>
   </property>
 </configuration>

实际修改了上图红色部分,修改后为:

 <?xml version="1.0"?>
 <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
 
 <configuration>
 <property>
 <name>yarn.resourcemanager.resource-tracker.address</name>
 <value>127.0.0.1:8031</value>
 <description>
 host is the hostname of the resource manager and port is the port on which the NodeManagers contact the  Resource Manager.
 </description>
 </property>
 
   <property>
     <name>yarn.nodemanager.aux-services</name>
     <value>mapreduce.shuffle</value>
   </property>
 
   <property>
     <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
     <value>org.apache.hadoop.mapred.ShuffleHandler</value>
   </property>
 
   <property>
     <name>yarn.log-aggregation-enable</name>
     <value>true</value>
   </property>
 
   <property>
     <name>yarn.dispatcher.exit-on-error</name>
     <value>true</value>
   </property>
 
   <property>
     <description>List of directories to store localized files in.</description>
     <name>yarn.nodemanager.local-dirs</name>
     <value>/ad/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
   </property>
 
   <property>
     <description>Where to store container logs.</description>
     <name>yarn.nodemanager.log-dirs</name>
     <value>/var/log/hadoop-yarn/containers</value>
   </property>
 
   <property>
     <description>Where to aggregate logs to.</description>
     <name>yarn.nodemanager.remote-app-log-dir</name>
     <value>/var/log/hadoop-yarn/apps</value>
   </property>
 
   <property>
     <description>Classpath for typical applications.</description>
      <name>yarn.application.classpath</name>
      <value>
         $HADOOP_CONF_DIR,
         $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
         $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
         $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
         $YARN_HOME/*,$YARN_HOME/lib/*
      </value>
   </property>
 </configuration>

 

PS:另外分享我重启hadoop和hive的脚本

 # stop hive, yarn and hdfs first
 echo "@@@ stop yarn and hdfs first"
 sudo service hive-metastore stop
 sudo service hive-server stop
 sudo service hadoop-yarn-resourcemanager stop
 sudo service hadoop-yarn-nodemanager stop
 sudo service hadoop-mapreduce-historyserver stop
 for x in `cd /etc/init.d ; ls hadoop-hdfs-*`
 do 
     sudo service $x stop
 done
 
 # clear and format
 echo "@@@ clear and format"
 sudo rm -rf /tmp/*
 sudo rm -rf /ad/hadoop-hdfs/cache/*
 sudo rm -rf /ad/hadoop-yarn/cache/*
 sudo rm -rf /ad/hadoop-mapreduce/cache/*
 sudo -u hdfs hdfs namenode -format
 
 # start hdfs
 echo "@@@ start hdfs"
 for x in `cd /etc/init.d ; ls hadoop-hdfs-*`
 do 
     sudo service $x start
 done
 
 # mkdir
 echo  "@@@ mkdir"
 sudo -u hdfs hadoop fs -rm -r /tmp
 sudo -u hdfs hadoop fs -mkdir /tmp
 sudo -u hdfs hadoop fs -chmod -R 1777 /tmp 
 sudo -u hdfs hadoop fs -mkdir /tmp/hadoop-yarn/staging
 sudo -u hdfs hadoop fs -chmod -R 1777 /tmp/hadoop-yarn/staging
 sudo -u hdfs hadoop fs -mkdir /tmp/hadoop-yarn/staging/history/done_intermediate
 sudo -u hdfs hadoop fs -chmod -R 1777 /tmp/hadoop-yarn/staging/history/done_intermediate
 sudo -u hdfs hadoop fs -chown -R mapred:mapred /tmp/hadoop-yarn/staging
 sudo -u hdfs hadoop fs -mkdir /var/log/hadoop-yarn
 sudo -u hdfs hadoop fs -chown yarn:mapred /var/log/hadoop-yarn
 
 sudo -u hdfs hadoop fs -ls -R /
 
 # start yarn
 echo "@@@ start yarn"
 sudo service hadoop-yarn-resourcemanager start 
 sudo service hadoop-yarn-nodemanager start 
 sudo service hadoop-mapreduce-historyserver start
 
 sudo -u hdfs hadoop fs -mkdir /user/maminghan
 sudo -u hdfs hadoop fs -chown maminghan /user/maminghan
 
 # start hive
sudo service hive-metastore start sudo service hive-server start sudo -u hdfs hadoop fs -mkdir /user/hive sudo -u hdfs hadoop fs -chown hive /user/hive sudo -u hdfs hadoop fs -mkdir /tmp sudo -u hdfs hadoop fs -chmod 777 /tmp #already exist sudo -u hdfs hadoop fs -chmod o+t /tmp sudo -u hdfs hadoop fs -mkdir /data sudo -u hdfs hadoop fs -chown hdfs /data sudo -u hdfs hadoop fs -chmod 777 /data sudo -u hdfs hadoop fs -chmod o+t /data sudo chown -R hive:hive /ad/hive

 

posted @ 2014-03-18 14:12  aquastar  阅读(2359)  评论(0编辑  收藏  举报