hadoop一键安装伪分布式
hadoop伪分布式和hive在openSUSE中的安装
在git上的路径为:https://github.com/huabingood/hadoop--------/tree/master
各个文件夹内容
一.lib文件夹,主要存放使用到的jar包
1. dom4j.jar 主要是用来解析xml文档的jar包
2. JavaStudy.jar 具体执行解析xml文档的jar包
3. mysql-connector-java-5.1.42-bin.jar hive中需要的Java连接MySQL数据库的驱动jar包
二.software文件夹,主要存放要安装的hadoop生态安装包
1. hadoop-2.6.0-cdh5.10.0.tar.gz CDH的hadoop安装包
2. hive-1.1.0-cdh5.10.0.tar.gz CDH的hive安装包
三.script文件夹,主要是一键安装的脚本
1. main.sh 主调函数,顺序执行安装步骤
2. install-env.sh 主要是安装需要用到的各种参数,比如安装目录,MySQL连接信息,xml配置信息等。有部分配置需要根据不同的机器进行修改。
3. functions.sh 安装步骤,主要是供main.sh调用的函数。不需要修改
如何执行
一.完成Linux环境的基本配置
1. 安装好Java
2. 安装好mysql
3. 停用防火墙
4. 配置网络,并能ping通外网
5. 设置好主机名
二.创建文件夹用于存放安装问价夹
1. root用户下修改/opt文件的权限
chown 用户名 /opt
2. 普通用户创建文件存放脚本的文件夹
mkdir /opt/install
3. 将安装文件夹放到刚才创建的文件夹中
4. 赋予脚本的执行权限
chmod 777 functions.sh install-env.sh main.sh
修改install-env.sh配置自己的安装环境
- 添加脚本的安装路径
- 添加MySQL的连接参数
- 给出要运行的Java的安装路径
main.sh代码
1 #!/bin/bash 2 3 source ./install-env.sh 4 source ./functions.sh 5 6 # 创建文件安装路径 7 creatFile ${INSTALL_PATH} 8 # 创建hadoop临时文件夹的存放路径 9 mkdir -p ${HADOOP_TMP_DIR} 10 11 # 解压hadoop,hive到安装目录下 12 myTar hadoop ${INSTALL_PATH} 13 myTar hive ${INSTALL_PATH} 14 15 # 修改hadoop中非xml的配置文件 16 notXMLHadoop ${INSTALL_PATH} ${HADOOP_PROFILEPATH} 17 isSuccess $? "非XML修改成功" "非XML修改失败,请检查原因!!!" 18 19 # 修改hadoop中xml的配置文件 20 alterHadoopXml "${coreSite[@]}" 21 alterHadoopXml "${hdfsSite[@]}" 22 23 # 配置pid的文件路径 24 alterBashrc 25 isSuccess $? "bashrc修改成功" "bashrc修改失败,请检查原因!!!" 26 # 格式化NameNode 27 formatNameNode 28 29 # 修改hive的的非xml配置文件 30 setHiveNotXML 31 isSuccess $? "非XML修改成功" "非XML修改失败,请检查原因!!!" 32 33 # 修改hive的xml的配置文件hive-site.xml 34 alterHiveXml "${hiveSite[@]}" 35 36 # 将Java的mysql的jar包驱动放到hive中的lib文件夹里面 37 hiveOver
install-env.sh
代码
1 #!/bin/bash 2 3 INSTALL_PATH="/opt/hadoop" # 请输入安装hadoop的绝对路径。改路径下应该没有内容! 4 # mysql的配置情况 5 mysqlIP="192.168.59.100" 6 mysqlPort="3306" 7 mysqlDatabase="hive_test" 8 mysqlUser="root" 9 mysqlPwd="123456" 10 mysqlConnetor="mysql-connector-java-5.1.42-bin.jar" 11 12 # 这里存放自己的JAVA_HOME 13 MY_JAVA_HOME="/opt/softWare/jdk1.8.0_131" 14 15 16 # 以下内容请不要修改!!! 17 HADOOP_PROFILEPATH="/etc/hadoop" 18 # hadoop临时文件的存放路径 19 HADOOP_TMP_DIR="${INSTALL_PATH}/tmp/hadoop" 20 HIVE_TMP_DIR="${INSTALL_PATH}/tmp/hive" 21 # 需要往一下文件中修改其中的JAVA_HOME 22 myArray=("hadoop-env.sh" "mapred-env.sh" "yarn-env.sh") 23 24 # hadoop中需要修改的xml配置文件的文件名和修改属性 25 coreSite=("core-site.xml" "fs.defaultFS" "hdfs://`hostname`:9000" "hadoop.tmp.dir" "${HADOOP_TMP_DIR}") 26 hdfsSite=("hdfs-site.xml" "dfs.replication" "1") 27 28 # hive日志文件的存放路径 29 hiveLogs="${INSTALL_PATH}/tmp/hive_logs" 30 # hive-site.xml文件的配置 31 hiveSite=("hive-site.xml" "javax.jdo.option.ConnectionURL" "jdbc:mysql://${mysqlIP}:${mysqlPort}/${mysqlDatabase}?createDatabaseIfNotExist=true&useSSL=false&characterEncoding=utf8" "javax.jdo.option.ConnectionDriverName" "com.mysql.jdbc.Driver" "javax.jdo.option.ConnectionUserName" "${mysqlUser}" "javax.jdo.option.ConnectionPassword" "${mysqlPwd}" "hive.exec.scratchdir" "${HIVE_TMP_DIR}" "system:java.io.tmpdir" "${HIVE_TMP_DIR}" "hive.cli.print.header" "true" "hive.cli.print.current.db" "true" "hive.metastore.warehouse.dir" "/opt/hadoop/warehouse")
functions.sh
代码
1 #!/bin/bash 2 3 source ./install-env.sh 4 5 # 创建文件夹,并清空文件夹 6 function creatFile(){ 7 if [ -e $1 ] 8 then 9 echo "文件夹已经存在!将清空${1}!!!\n" 10 isContinue "清空${1}" "将要清空${1}" 11 rm -rf ${1}/* 12 isSuccess $? "${1}清空成功,准备解压文件。" "${1}清空失败,请检查原因!!!" 13 else 14 mkdir ${1} 15 isSuccess $? "${1}创建成功,准备解压文件。" "${1}创建失败,请检查原因!!!" 16 fi 17 } 18 19 # 询问是否继续进项操作 20 function isContinue(){ 21 echo -e "\e[1;31m是否${1}【y/n】\e[0m" 22 read choose 23 if [ "${choose}" = "y" ] || [ "${choose}" = "yes" ] || [ "${choose}" = "Y" ] || [ "${choose}" = "YES" ] 24 then 25 echo -e "\e[1;31m${1}\e[0m" 26 else 27 exit 28 fi 29 } 30 31 # 判断前一个操作是否成功,如果错误就全部推倒重来 32 function isSuccess(){ 33 if [ $1 -eq 0 ] 34 then 35 echo "${2}" 36 else 37 echo -e "\e[1;31m${3}\e[0m" 38 rm -rf ${INSTALL_PATH} 39 exit 40 fi 41 } 42 43 # 解压缩文件 44 function myTar(){ 45 filePath=`find ../software -name ${1}* | awk -F "/" '{print $NF}'` 46 tar -zxvf ../software/${filePath} -C ${2} 1>>/dev/null 2>>/dev/null 47 isSuccess $? "${1}解压成功。" "${2}解压失败,请查找原因!!!" 48 } 49 50 # 修改hadoop的非xml类的配置文件 51 function notXMLHadoop(){ 52 filePath=`ls ${1} | grep hadoop` 53 mkdir ${HADOOP_TMP_DIR}/myPID 54 55 for i in ${myArray[@]} 56 do 57 sed -i '/export JAVA_HOME=$/d' ${1}/${filePath}${2}/${i} 58 #sed -i "/export JAVA_HOME=${MY_JAVA_HOME}/d" ${1}/${filePath}${2}/${i} 59 sed -i "3a export JAVA_HOME=${MY_JAVA_HOME}" ${1}/${filePath}${2}/${i} 60 isSuccess $? "$i修改成功" "$i修改失败" 61 62 done 63 64 # 修改各个env.sh文件,修改pid文件存放的路径 65 66 sed -i "s@export HADOOP_PID_DIR=.*@HADOOP_PID_DIR=${HADOOP_TMP_DIR}/myPID@g" ${1}/${filePath}${2}/hadoop-env.sh 67 68 # sed -i "/export HADOOP_MAPRED_PID_DIR=${HADOOP_TMP_DIR}\/myPID/d" ${1}/${filePath}${2}/mapred-env.sh 69 sed -i "4a export HADOOP_MAPRED_PID_DIR=${HADOOP_TMP_DIR}/myPID" ${1}/${filePath}${2}/mapred-env.sh 70 71 # sed -i "/export YARN_PID_DIR=${HADOOP_TMP_DIR}\/myPID/d" ${1}/${filePath}${2}/yarn-env.sh 72 sed -i "4a export YARN_PID_DIR=${HADOOP_TMP_DIR}/myPID" ${1}/${filePath}${2}/yarn-env.sh 73 74 # 修改slaves文件 75 sed -i "1a `hostname`" ${1}/${filePath}${2}/slaves 76 isSuccess $? "slaves修改成功" "slaves修改失败" 77 sed -i '1d' ${1}/${filePath}${2}/slaves 78 } 79 80 # 修改hadoop中的xml类的配置文件 81 function alterHadoopXml(){ 82 83 myArray=($@) 84 filePath=`ls ${INSTALL_PATH} | grep hadoop` 85 for ((i=1;i<${#myArray[@]};i+=2)) 86 do 87 j=$((i+1)) 88 java -jar ../lib/JavaStudy.jar /${INSTALL_PATH}/${filePath}${HADOOP_PROFILEPATH}/${myArray[0]} add ${myArray[$i]} ${myArray[$j]} 89 isSuccess $? "${myArray[0]}中的${myArray[$i]}修改成功" "${myArray[0]}中的${myArray[$i]}修改失败,请检查原因!!!" 90 done 91 } 92 93 # 修改当前用户的.bashrc文件,添加hadoop中pid等配置的存放路径 94 function alterBashrc(){ 95 bashrcPathe=~/.bashrc 96 mkdir ${HADOOP_TMP_DIR}/myPID 97 sed -i "/myPIDDir=/d" ${bashrcPathe} 98 sed -i "\$a myPIDDir=${HADOOP_TMP_DIR}/myPID" ${bashrcPathe} 99 sed -i "/export HADOOP_PID_DIR=${myPIDDir}/d" ${bashrcPathe} 100 sed -i '$a export HADOOP_PID_DIR=${myPIDDir}' ${bashrcPathe} 101 sed -i '/export YARN_PID_DIR=${myPIDDir}/d' ${bashrcPathe} 102 sed -i '$a export YARN_PID_DIR=${myPIDDir}' ${bashrcPathe} 103 sed -i '/export HADOOP_MAPRED_PID_DIR=${myPIDDir}/d' ${bashrcPathe} 104 sed -i '$a export HADOOP_MAPRED_PID_DIR=${myPIDDir}' ${bashrcPathe} 105 sed -i '/export HBASE_PID_DIR=${myPIDDir}/d' ${bashrcPathe} 106 sed -i '$a export HBASE_PID_DIR=${myPIDDir}' ${bashrcPathe} 107 108 source ~/.bashrc 109 } 110 111 # 格式化NameNode 112 function formatNameNode(){ 113 filePath=`ls ${INSTALL_PATH} | grep hadoop` 114 ${INSTALL_PATH}/${filePath}/bin/hdfs namenode -format 1>>/dev/null 2>>/dev/null 115 if [ $? -ne 0 ] 116 then 117 exit 118 fi 119 isSuccess $? "NameNode格式化成功!" "NameNode格式化失败,请查找原因!!!" 120 } 121 122 # 配置好hive的配置文件 123 function setHiveNotXML(){ 124 echo "开始修改hive的非xml配置文件" 125 filePath=`ls ${INSTALL_PATH} | grep hive` 126 cp ${INSTALL_PATH}/${filePath}/conf/hive-env.sh.template ${INSTALL_PATH}/${filePath}/conf/hive-env.sh 127 cp ${INSTALL_PATH}/${filePath}/conf/hive-exec-log4j.properties.template ${INSTALL_PATH}/${filePath}/conf/hive-exec-log4j.properties 128 cp ${INSTALL_PATH}/${filePath}/conf/hive-log4j.properties.template ${INSTALL_PATH}/${filePath}/conf/hive-log4j.properties 129 touch ${INSTALL_PATH}/${filePath}/conf/hive-site.xml 130 echo '<?xml version="1.0" encoding="UTF-8" standalone="no"?>' > ${INSTALL_PATH}/${filePath}/conf/hive-site.xml 131 sed -i '1a <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>' ${INSTALL_PATH}/${filePath}/conf/hive-site.xml 132 sed -i '2a <configuration>' ${INSTALL_PATH}/${filePath}/conf/hive-site.xml 133 sed -i '3a </configuration>' ${INSTALL_PATH}/${filePath}/conf/hive-site.xml 134 # 修改hive-env.sh 135 hadoopPath=`ls ${INSTALL_PATH} | grep hadoop` 136 sed -i "\$a HADOOP_HOME=${INSTALL_PATH}/${hadoopPath}" ${INSTALL_PATH}/${filePath}/conf/hive-env.sh 137 sed -i "\$a HIVE_CONF_DIR=${INSTALL_PATH}/${filePath}/conf" ${INSTALL_PATH}/${filePath}/conf/hive-env.sh 138 sed -i "\$a export JAVA_HOME=${MY_JAVA_HOME}" ${INSTALL_PATH}/${filePath}/conf/hive-env.sh 139 140 # 修改hive-log4j.properties文件 141 mkdir ${hiveLogs} 142 sed -i "s@hive.log.dir=.*@hive.log.dir=${hiveLogs}@g" ${INSTALL_PATH}/${filePath}/conf/hive-log4j.properties 143 } 144 145 # 修改hive的xml文件 146 function alterHiveXml(){ 147 myArray=($@) 148 filePath=`ls ${INSTALL_PATH} | grep hive` 149 for ((i=1;i<${#myArray[@]};i+=2)) 150 do 151 j=$((i+1)) 152 java -jar ../lib/JavaStudy.jar /${INSTALL_PATH}/${filePath}/conf/${myArray[0]} add ${myArray[$i]} ${myArray[$j]} 153 isSuccess $? "${myArray[0]}中的${myArray[$i]}修改成功" "${myArray[0]}中的${myArray[$i]}修改失败,请检查原因!!!" 154 done 155 } 156 157 # 创建hive的配置文件夹,并Java的mysql驱动jar包复制到hive中的lib文件夹中 158 function hiveOver(){ 159 mkdir ${HIVE_TMP_DIR} 160 filePath=`ls ${INSTALL_PATH} | grep hive` 161 cp ../lib/${mysqlConnetor} ${INSTALL_PATH}/${filePath}/lib 162 isSuccess $? "hadoop与hive的伪分布式安装成功,just have fun!" "复制mysql的jar包失败,请检查原因!!!" 163 }
JavaStudy.jar代码
package com.huabingood; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.Element; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; import java.io.*; /** * Created by 杨华彬 on 2017/5/4. */ public class TestXML { public static void main(String[] args) { /*String[] args = new String[4]; args[0] = "D:\\core-site.xml"; args[1] = "add"; args[2] = "name"; args[3] = "1234567";*/ // 创建解析器,准备解析指定路径的xml文档 SAXReader saxReader = new SAXReader(); Document doc = null; try { doc = saxReader.read(new File(args[0])); } catch (DocumentException e) { e.printStackTrace(); } addNode(args, doc); write2XML(args[0], doc); } /** * 获取参数后,自动创建name和value节点,将根据传入的值设置name和value的值 * * @param args 传递的是创建一个节点的参数。参数1表示xml文档的路径, * 参数2表示是创建节点还是修改节点,参数3表示name节点的值,参数4表示 * value节点的值。 */ public static void addNode(String[] args, Document doc) { // 获取根节点 Element root = doc.getRootElement(); // 创建根节点下的子节点 Element parentNode = root.addElement("property"); // 创建name和value节点 Element nameChild = parentNode.addElement("name"); nameChild.setText(args[2]); Element valueChild = parentNode.addElement("value"); valueChild.setText(args[3]); } /** * 将修改内容写到文档中去 * * @param file 修改的文件路径 * @param doc 前面传进来的xml文档对象模型。 */ public static void write2XML(String file, Document doc) { // 设置排版格式为缩进格式,字符为utf-8 OutputFormat of = OutputFormat.createPrettyPrint(); of.setEncoding("utf-8"); // 创建写入流,准备写入对象 File myFile = new File(file); OutputStreamWriter osw = null; FileOutputStream fos = null; try { fos = new FileOutputStream(myFile); osw = new OutputStreamWriter(fos, "utf-8"); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } XMLWriter xmlWriter = new XMLWriter(osw, of); // 开始写入内容 try { xmlWriter.write(doc); xmlWriter.flush(); } catch (IOException e) { e.printStackTrace(); } finally { try { xmlWriter.close(); } catch (IOException e) { e.printStackTrace(); } } } }