Hive 搭建
系统环境
操作系统: CentOS 7 主机名: centos02 IP: 192.168.122.1 Java: 1.8 Hadoop: 2.8.5
Hive: 2.3.4
MySQL: 8.0.12
新建 Hive目录
[root@centos02 opt]# cd ./bigdata [root@centos02 bigdata]# mkdir -m 777 hive [root@centos02 bigdata]# [root@centos02 bigdata]# ll 总用量 0 drwxrwxrwx 3 root root 53 8月 29 02:24 hadoop drw-rw---- 3 root root 49 8月 29 02:16 hive [root@centos02 bigdata]#
解压Hive到指定目录,并重命名目录名称
[root@centos02 opt]# tar -xvzf apache-hive-2.3.4-bin.tar.gz -C /opt/bigdata/hive [root@centos02 opt]# mv apache-hive-2.3.4-bin hive-2.3.4 [root@centos02 opt]# [root@centos02 hive]# cd /opt/bigdata/hive [root@centos02 hive]# ll 总用量 226528 drwxr-xr-x 14 root root 238 8月 29 02:49 hive-2.3.4 [root@centos02 hive]# pwd /opt/bigdata/hive [root@centos02 hive]#
新建Hive临时文件目录
[root@centos02 hive]# cd ./hive-2.3.4/ [root@centos02 hive-2.3.4]# [root@centos02 hive-2.3.4]# mkdir -m 777 warehouse [root@centos02 hive-2.3.4]# mkdir -m 777 iotemp [root@centos02 hive-2.3.4]# mkdir -m 777 tmp [root@centos02 hive-2.3.4]# mkdir -m 777 logs [root@centos02 hive-2.3.4]# ll 总用量 56 drwxr-xr-x 3 root root 133 6月 11 16:52 bin drwxr-xr-x 2 root root 4096 6月 11 16:52 binary-package-licenses drwxr-xr-x 3 root root 4096 6月 11 17:10 conf drwxr-xr-x 4 root root 34 6月 11 16:52 examples drwxr-xr-x 7 root root 68 6月 11 16:52 hcatalog drwxrwxrwx 2 root root 6 8月 29 02:49 iotemp drwxr-xr-x 2 root root 44 6月 11 16:52 jdbc drwxr-xr-x 4 root root 12288 8月 29 02:47 lib -rw-r--r-- 1 root root 20798 11月 1 2018 LICENSE drwxrwxrwx 2 root root 6 8月 29 02:49 logs -rw-r--r-- 1 root root 230 11月 1 2018 NOTICE -rw-r--r-- 1 root root 313 11月 1 2018 RELEASE_NOTES.txt drwxr-xr-x 4 root root 35 6月 11 16:52 scripts drwxrwxrwx 2 root root 6 8月 29 02:49 tmp drwxrwxrwx 2 root root 6 8月 29 02:49 warehouse [root@centos02 hive-2.3.4]# pwd /opt/bigdata/hive/hive-2.3.4 [root@centos02 hive-2.3.4]#
添加环境变量
[root@centos02 hive-2.3.4]# vim /etc/profile
# Hive export HIVE_HOME=/opt/bigdata/hive/hive-2.3.4 export CLASSPATH=$CLASSPATH:$HIVE_HOME/lib export PATH=$PATH:$HIVE_HOME/bin
[root@centos02 hive-2.3.4]# source /etc/profile
修改 Hive 配置文件
[root@centos02 hive-2.3.4]# cd $HIVE_HOME/conf [root@centos02 conf]# pwd /opt/bigdata/hive/hive-2.3.4/conf [root@centos02 conf]#
[root@centos02 conf]# mv hive-env.sh.template hive-env.sh [root@centos02 conf]# mv hive-default.xml.template hive-default.xml [root@centos02 conf]# [root@centos02 conf]# mv beeline-log4j2.properties.template beeline-log4j2.properties [root@centos02 conf]# mv hive-exec-log4j2.properties.template hive-exec-log4j2.properties [root@centos02 conf]# mv hive-log4j2.properties.template hive-log4j2.properties [root@centos02 conf]# mv llap-cli-log4j2.properties.template llap-cli-log4j2.properties [root@centos02 conf]# mv llap-daemon-log4j2.properties.template llap-daemon-log4j2.properties
[root@centos02 conf]# ll 总用量 288 -rw-r--r-- 1 root root 1596 11月 1 2018 beeline-log4j2.properties -rw-r--r-- 1 root root 257573 8月 29 03:05 hive-default.xml -rw-r--r-- 1 root root 2628 8月 29 02:57 hive-env.sh -rw-r--r-- 1 root root 2274 11月 1 2018 hive-exec-log4j2.properties -rw-r--r-- 1 root root 2958 8月 29 03:02 hive-log4j2.properties -rw-r--r-- 1 root root 2060 11月 1 2018 ivysettings.xml -rw-r--r-- 1 root root 2719 11月 1 2018 llap-cli-log4j2.properties -rw-r--r-- 1 root root 7041 11月 1 2018 llap-daemon-log4j2.properties -rw-r--r-- 1 root root 2662 11月 1 2018 parquet-logging.properties drwxr-xr-x 3 root root 42 6月 11 17:08 ${system:java.io.tmpdir} [root@centos02 conf]#
修改 Hive运行环境变量
hive-env.sh
[root@centos02 conf]# vim hive-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8 export HADOOP_HOME=/opt/bigdata/hadoop/hadoop-2.8.5 export HIVE_HOME=/opt/bigdata/hive/hive-2.3.4 export HIVE_CONF_DIR=/opt/bigdata/hive/hive-2.3.4/conf export HIVE_AUX_JARS_PATH=/opt/bigdata/hive/hive-2.3.4/lib
hive-log4j2.properties
[root@centos02 conf]# vim hive-log4j2.properties
hive.log.dir=$HIVE_HOME/logs
修改 Hive配置文件
hive-site.xml
[root@centos02 conf]# touch hive-site.xml [root@centos02 conf]# vim hive-site.xml
<?xml version="1.0" encoding="UTF-8" standalone="no"?> <?xml-stylesheet type="text/xsl" href="configuration.xsl"?> <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://centos02:3306/hiveDB?useSSL=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.cj.jdbc.Driver</value> <description> 默认: com.mysql.jdbc.Driver 新版: com.mysql.cj.jdbc.Driver </description> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>sa123_*****.</value> </property> <property> <name>hive.metastore.uris</name> <value>thrift://centos02:9083</value> <description>默认端口9083</description> </property> <property> <name>hive.home</name> <value>/opt/bigdata/hive/hive-2.3.4</value> </property> <property> <name>hive.metastore.warehouse.dir</name> <value>${hive.home}/warehouse</value> <description>指定Hive的数据存储在HDFS上的目录位置</description> </property> <property> <name>hive.metastore.schema.verification</name> <value>false</value> </property> <property> <name>hive.querylog.location</name> <value>${hive.home}/iotmp</value> </property> <property> <name>hive.exec.local.scratchdir</name> <value>${hive.home}/iotmp</value> </property> <property> <name>hive.downloaded.resources.dir</name> <value>${hive.home}/iotmp</value> </property> </configuration>
为了使Hive能够连接上MySQL,需要将JDBC驱动放置在类的目录下
将mysql驱动包放置在hive的根路径下的lib目录
[root@centos02 conf]# cp /opt/mysql-connector-java-8.0.13.jar $HIVE_HOME/lib
将 Hive目录里的jline 替换 Hadoop目录里的 jline
Hadoop2.7版本后$HADOOP_HOME/share/hadoop/yarn/lib/目录下就没有jline-0.9.94.jar了
[root@centos02 conf]# rm -f $HADOOP_HOME/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jline-0.9.94.jar [root@centos02 conf]# cp $HIVE_HOME/lib/jline-2.12.jar $HADOOP_HOME/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/ [root@centos02 conf]# rm -f $HADOOP_HOME/share/hadoop/kms/tomcat/webapps/kms/WEB-INF/lib/jline-0.9.94.jar [root@centos02 conf]# cp $HIVE_HOME/lib/jline-2.12.jar $HADOOP_HOME/share/hadoop/kms/tomcat/webapps/kms/WEB-INF/lib/
初始化元数据
[root@centos02 conf]# cd $HIVE_HOME/bin [root@centos02 bin]# schematool -dbType mysql -initSchema SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/opt/bigdata/hive/hive-2.3.4/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/opt/bigdata/hadoop/hadoop-2.8.5/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Metastore connection URL: jdbc:mysql://centos02:3306/hiveDB?useSSL=false Metastore Connection Driver : com.mysql.jdbc.Driver Metastore connection User: root Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'. The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary. Starting metastore schema initialization to 2.3.0 Initialization script hive-schema-2.3.0.mysql.sql Initialization script completed schemaTool completed [root@centos02 bin]#
查看数据库是否自动生成元数据的存储表
mysql> show tables; +---------------------------+ | Tables_in_hiveDB | +---------------------------+ | AUX_TABLE | | BUCKETING_COLS | | CDS | | COLUMNS_V2 | | COMPACTION_QUEUE | | COMPLETED_COMPACTIONS | | COMPLETED_TXN_COMPONENTS | | DATABASE_PARAMS | | DBS | | DB_PRIVS | | DELEGATION_TOKENS | | FUNCS | | FUNC_RU | | GLOBAL_PRIVS | | HIVE_LOCKS | | IDXS | | INDEX_PARAMS | | KEY_CONSTRAINTS | | MASTER_KEYS | | NEXT_COMPACTION_QUEUE_ID | | NEXT_LOCK_ID | | NEXT_TXN_ID | | NOTIFICATION_LOG | | NOTIFICATION_SEQUENCE | | NUCLEUS_TABLES | | PARTITIONS | | PARTITION_EVENTS | | PARTITION_KEYS | | PARTITION_KEY_VALS | | PARTITION_PARAMS | | PART_COL_PRIVS | | PART_COL_STATS | | PART_PRIVS | | ROLES | | ROLE_MAP | | SDS | | SD_PARAMS | | SEQUENCE_TABLE | | SERDES | | SERDE_PARAMS | | SKEWED_COL_NAMES | | SKEWED_COL_VALUE_LOC_MAP | | SKEWED_STRING_LIST | | SKEWED_STRING_LIST_VALUES | | SKEWED_VALUES | | SORT_COLS | | TABLE_PARAMS | | TAB_COL_STATS | | TBLS | | TBL_COL_PRIVS | | TBL_PRIVS | | TXNS | | TXN_COMPONENTS | | TYPES | | TYPE_FIELDS | | VERSION | | WRITE_SET | +---------------------------+ 57 rows in set (0.02 sec) mysql>
提示错误
Loading class `com.mysql.jdbc.Driver'. This is deprecated. The new driver class is `com.mysql.cj.jdbc.Driver'.
The driver is automatically registered via the SPI and manual loading of the driver class is generally unnecessary. 正在加载类“com.mysql.jdbc.driver”。这已被弃用。新的驱动程序类是'com.mysql.cj.jdbc.driver'。
驱动程序通过SPI自动注册,通常不需要手动加载驱动程序类。
修改配置文件(hive-site.xml)的驱动名称
[root@centos02 conf]# vim hive-site.xml
<property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property>
<property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.cj.jdbc.Driver</value> </property>
先启动Hadoop
[root@centos02 conf]# cd $HADOOP_HOME/sbin [root@centos02 sbin]# start-all.sh [root@centos02 sbin]# [root@centos02 sbin]# jps 11876 DataNode 12074 SecondaryNameNode 12250 ResourceManager 12554 NodeManager 12730 Jps 11708 NameNode [root@centos02 sbin]#
启动元数据服务(另起一个窗口)
[root@centos02 centos02]# cd $HIVE_HOME/bin [root@centos02 bin]#
[root@centos02 bin]# hive --service metastore &
启动 Hive
[root@centos02 conf]# cd $HIVE_HOME/bin [root@centos02 bin]# [root@centos02 bin]# hive which: no hbase in (/usr/local/java/jdk1.8/bin:/usr/lib64/qt-3.3/bin:/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin:/usr/local/scala/scala-2.12/bin:/opt/bigdata/hadoop/hadoop-2.8.5/sbin:/opt/bigdata/hadoop/hadoop-2.8.5/bin:/opt/bigdata/zookeeper/zookeeper-3.4.12/bin:/opt/bigdata/hive/hive-2.3.4/bin:/home/centos02/.local/bin:/home/centos02/bin) SLF4J: Class path contains multiple SLF4J bindings. SLF4J: Found binding in [jar:file:/opt/bigdata/hive/hive-2.3.4/lib/log4j-slf4j-impl-2.6.2.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: Found binding in [jar:file:/opt/bigdata/hadoop/hadoop-2.8.5/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class] SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory] Logging initialized using configuration in file:/opt/bigdata/hive/hive-2.3.4/conf/hive-log4j2.properties Async: true Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases. hive>
hive> show databases; OK default Time taken: 3.941 seconds, Fetched: 1 row(s) hive>