[Script] Hadoop使用docker搭建完全分布模式环境的脚本

　　之前调试程序都是在本机用的单机模式，体系结构老师要求实验要用完全分布模式的环境运行程序。　　
　　不想用虚拟机，完全是出于对界面的不习惯。于是使用了docker。
　　脚本会与之前的hadoop环境搭建有重叠。
　　参考了博客：http://tashan10.com/yong-dockerda-jian-hadoopwei-fen-bu-shi-ji-qun/，并不完全一致，因为我希望和本机的java hadoop版本一致。
  1 # Reference http://tashan10.com/yong-dockerda-jian-hadoopwei-fen-bu-shi-ji-qun/
  2 
  3 # Ubuntu 14.04 LTS Hadoop 2.7 Fully Distributed with docker.
  4 
  5 # Install docker.
  6 sudo apt-get install apt-transport https
  7 sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 36A1D7869245C8950F966E92D8576A8BA88D21E9
  8 sudo bash -c "echo deb https://get.docker.io/ubuntu docker main > /etc/apt/sources.list.d/docker.list"
  9 sudo apt-get update
 10 sudo apt-get install lxc-docker
 11 
 12 # Remove "sudo". kirai is my ubuntu's username.
 13 sudo groupadd docker
 14 sudo gpasswd -a kirai docker
 15 sudo reboot
 16 
 17 # Install ubuntu mirror.
 18 docker pull ubuntu:14.04
 19 
 20 # Run the ubuntu mirror.
 21 docker run -ti ubuntu:14.04
 22 
 23 # Change the package mirror.
 24 # mv /etc/apt/sources.list /etc/apt/sources.list.bk
 25 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial main restricted universe multiverse" > /etc/apt/sources.list
 26 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-updates main restricted universe multiverse"  >> /etc/apt/sources.list
 27 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-backports main restricted universe multiverse" >> /etc/apt/sources.list
 28 # echo "deb https://mirrors.tuna.tsinghua.edu.cn/ubuntu/ xenial-security main restricted universe multiverse" >> /etc/apt/sources.list
 29 
 30 
 31 # Install java hadoop in the mirror.
 32 
 33 apt-get install software-properties-common python-software-properties
 34 add-apt-repository ppa:webupd8team/java
 35 apt-get update
 36 apt-get install oracle-java7-installer
 37 exit
 38 
 39 # Persistence (4631f498dec7 is the container's id with java)
 40 docker commit -m "java installed" 4631f498dec7 ubuntu:java
 41 
 42 # Restart the container with java.
 43 # Reference ubuntu 14.04 LTS hadoop installation tutorial by me.
 44 ##get & install hadoop 2.7.1    (under user : hadoop)
 45 cd ~
 46 mkdir hadoop
 47 sudo wget http://mirror.bit.edu.cn/apache/hadoop/common/hadoop-2.7.1/hadoop-2.7.1.tar.gz    #get hadoop2.7.1
 48 sudo tar xzf hadoop-2.7.1.tar.gz    #unzip the hadoop-*.*.*.tar.gz
 49 sudo rm hadoop-2.7.1.tar.gz    #remove the zip
 50 sudo mv hadoop-2.7.1 /usr/local/    #install hadoop at local
 51 sudo chmod 774 /usr/local/hadoop     #granted permissions to users (r&w)
 52 
 53 ##configure ~/.bashrc
 54 update-alternatives --config java    #get java's path (stared one. such as '/usr/lib/jvm/java-7-openjdk-amd64/jre/bin/java'. but we just need '/usr/lib/jvm/java-7-openjdk-amd64' this shorter path to set up JAVA_HOME environment variables)
 55 sudo vi ~/.bashrc    #edit bashrc file
 56 
 57 ##add the content below to the end of bashrc
 58 #--------------------------------------------------------------#
 59 
 60 export JAVA_HOME=/usr/lib/jvm/java-7-oracle
 61 export HADOOP_INSTALL=/usr/local/hadoop-2.7.1
 62 export PATH=$PATH:$HADOOP_INSTALL/bin
 63 export PATH=$PATH:$HADOOP_INSTALL/sbin
 64 export HADOOP_MAPRED_HOME=$HADOOP_INSTALL
 65 export HADOOP_COMMON_HOME=$HADOOP_INSTALL
 66 export HADOOP_HDFS_HOME=$HADOOP_INSTALL
 67 export YARN_HOME=$HADOOP_INSTALL
 68 export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_INSTALL/lib/native
 69 export HADOOP_OPTS="-Djava.library.path=$HADOOP_INSTALL/lib"
 70 #HADOOP VARIABLES END
 71 
 72 #--------------------------------------------------------------#
 73 source ~/.bashrc    #make the environment variables come into effect
 74 
 75 ##configure hadoop
 76 sudo vi /usr/local/hadoop-2.7.1/etc/hadoop/hadoop-env.sh     #edit hadoop-env.sh
 77 ?JAVA_HOME    #(in vim) locate JAVA_HOME
 78 #change 'export JAVA_HOME=${JAVA_HOME}' into 'export JAVA_HOME=/usr/lib/jvm/java-7-oracle'
 79 source /usr/local/hadoop-2.7.1/etc/hadoop/hadoop-env.sh #update
 80 
 81 ##test
 82 cd /usr/local/hadoop-2.7.1/    #go there
 83 sudo mkdir input
 84 sudo cp README.txt input
 85 bin/hadoop jar share/hadoop/mapreduce/sources/hadoop-mapreduce-examples-2.7.1-sources.jar org.apache.hadoop.examples.WordCount input output
 86 
 87 
 88 
 89 
 90 #----------------- FINISHED INSTALLATION ---------------------------#
 91 # Persistence (8d9a50a0ee10 is the container's id with hadoop)
 92 docker commit -m "hadoop installed" 8d9a50a0ee10 ubuntu:hadoop
 93 
 94 # Configure hadoop.
 95 # mainly core-site.xml、hdfs-site.xml、mapred-site.xml
 96 cd /usr/local/hadoop-2.7.1
 97 mkdir tmp
 98 mkdir namenode
 99 mkdir datanode
100 
101 cd etc/hadoop
102 cp mapred-site.xml.template mapred-site.xml
103 
104 vi core-site.xml
105 #--add contents between <configuration></configuration> --#
106     <property>
107         <name>hadoop.tmp.dir</name>
108         <value>/usr/local/hadoop-2.7.1/tmp</value>
109         <description>A base for other temporary directories.</description>
110     </property>
111 
112     <property>
113         <name>fs.default.name</name>
114         <value>hdfs://master:9000</value>
115         <final>true</final>
116         <description>The name of the default file system.  A URI whose
117         scheme and authority determine the FileSystem implementation.  The
118         uri's scheme determines the config property (fs.SCHEME.impl) naming
119         the FileSystem implementation class.  The uri's authority is used to
120         determine the host, port, etc. for a filesystem.</description>
121     </property>
122 #----#
123 
124 vi hdfs-site.xml
125 #--add contents between <configuration></configuration> one master, two slave--#
126     <property>
127     <name>dfs.replication</name>
128     <value>2</value>
129     <final>true</final>
130     <description>Default block replication.
131     The actual number of replications can be specified when the file is created.
132     The default is used if replication is not specified in create time.
133     </description>
134     </property>
135 
136     <property>
137     <name>dfs.namenode.name.dir</name>
138     <value>/usr/local/hadoop-2.7.1/namenode</value>
139     <final>true</final>
140     </property>
141 
142     <property>
143     <name>dfs.datanode.data.dir</name>
144     <value>/usr/local/hadoop-2.7.1/datanode</value>
145     <final>true</final>
146     </property>
147 #----#
148 
149 vi mapred-site.xml
150 #--add contents between <configuration></configuration>--#
151     <property>
152     <name>mapred.job.tracker</name>
153     <value>master:9001</value>
154     <description>
155     The host and port that the MapReduce job tracker runs
156     at.  If "local", then jobs are run in-process as a single map
157     and reduce task.
158     </description>
159     </property>
160 #----#
161 
162 # Format namenode.
163 hadoop namenode -format
164 
165 # Install ssh.
166 apt-get install ssh
167 cd ~
168 ssh-keygen -t rsa -P '' -f ~/.ssh/id_dsa
169 cd .ssh
170 cat id_dsa.pub >> authorized_keys
171 
172 vi ~/.bashrc
173 #--append--#
174 #autorun
175 /usr/sbin/sshd
176 #----#
177 
178 # ifconfig enable.
179 apt-get install net-tools
180 exit
181 
182 # Persistence (342b9f9e1893 is the container's id with configured-hadoop)
183 docker commit -m "configured hadoop" 342b9f9e1893 ubuntu:chadoop
184 
185 
186 #-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-#
187 # Build distributed environment.
188 
189 # Open three terminals and run:
190 docker run -ti -h slave1 ubuntu:chadoop
191 docker run -ti -h slave2 ubuntu:chadoop
192 docker run -ti -h master ubuntu:chadoop
193 
194 # Write down the master and slaves' IP:
195 # slave1:172.17.0.2
196 # slave2:172.17.0.3
197 # master:172.17.0.4
198 
199 # For each container:
200 vi /etc/hosts
201 
202 #--write down--#
203 172.17.0.2    slave1
204 172.17.0.3    slave2
205 172.17.0.4    master
206 #----#
207 
208 # For master:
209 vi /usr/local/hadoop-2.7.1/etc/hadoop/slaves
210 
211 #--write down--#
212 slave1
213 slave2
214 #----#
215 
216 #-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-#
posted @ 2017-06-12 17:41 Kirai 阅读(225) 评论(0) 编辑收藏举报
刷新页面返回顶部
Kirai

苟有恒,何必三更起五更眠;最无益,只怕一日暴十日寒

[Script] Hadoop使用docker搭建完全分布模式环境的脚本

公告