Hive on Spark集成Hasee

一、版本介绍

hive      3.1.2
spark    2.3.4
hbase   2.2.4

Phoenix     5.0.0

二、集群规划

基于上一节集群规划的基础,在master-103节点上部署hbase

三、安装配置hbase

1、解压

$ tar -zxvf hbase-2.2.4-bin.tar.gz -C /home/hadoop/opt

2、添加环境变量

$ vi /etc/profile

export HBASE_HOME=/home/hadoop/opt/hbase-2.2.4
export PATH=$HBASE_HOME/bin:$PATH

3、配置hbase

$ cd /home/hadoop/opt/hbase-2.2.4/conf
$ vi hbase-env.sh

export JAVA_HOME=/home/hadoop/opt/jdk1.8.0_181
export HADOOP_HOME=/home/hadoop/opt/hadoop-2.7.6
export HBASE_HOME=/home/hadoop/opt/hbase-2.2.4
export HBASE_CLASSPATH=/home/hadoop/opt/hadoop-2.7.6/etc/hadoop
export HBASE_PID_DIR=/home/hadoop/opt/hbase-2.2.4/pids
export HBASE_MANAGES_ZK=false

$ vi hbase-site.xml

<!-- 存储目录 -->
<property>  
    <name>hbase.rootdir</name>  
    <value>hdfs://master-100:9000/hbase</value>
    <description>The directory shared byregion servers.</description>  
</property>
<!-- hbase的端口 -->
<property>  
    <name>hbase.zookeeper.property.clientPort</name>  
    <value>2181</value>  
    <description>Property from ZooKeeper'sconfig zoo.cfg. The port at which the clients will connect.</description>  
</property>  
<!--  超时时间 -->
<property>  
    <name>zookeeper.session.timeout</name>  
    <value>120000</value>  
</property>  
<!--  zookeeper 集群配置-->
<property>
    <name>hbase.zookeeper.quorum</name>
    <value>master-104:2181,master-105:2181,master-106:2181</value>
</property> 
<property>  
    <name>hbase.tmp.dir</name>
    <value>/home/hadoop/opt/hbase-2.2.4/tmp</value>  
</property>
<property>
    <name>dfs.replication</name> 
    <value>1</value>
</property>
<!-- false是单机模式,true是分布式模式  -->
<property>  
    <name>hbase.cluster.distributed</name>  
    <value>true</value>  
</property>

4、配置hbase子节点
$ vi regionservers

master-103

5、复制hive的lib目录下的hive-hbase-handler-*.jar 到hbase的lib目录下(master-102节点)

$ cd /home/hadoop/opt/apache-hive-3.1.2-bin/lib
$ cp hive-hbase-handler-*.jar /home/hadoop/opt/hbase-2.2.4/lib/

6、启动hbase
$ cd /home/hadoop/opt/hbase-2.2.4
$ bin/start-hbase.sh

四、hive映射hbase

1、在hive的hive-site.xml添加如下配置
$ vi hive-site.xml

    <property>
        <name>hbase.zookeeper.quorum</name>
        <value>master-104,master-105,master-106</value>
    </property>
    <property>  
            <name>hbase.zookeeper.property.clientPort</name>  
            <value>2181</value>  
            <description>
            Property from ZooKeeper's config zoo.cfg.  
                The port at which the clients will connect.  
            </description>  
      </property> 
    <property>  
            <name>hbase.master</name>  
            <value>master-103:60000</value>  
    </property>

2、复制hbase的lib下的jar包到spark的jars目录中

$ cd /home/hadoop/opt/hbase-2.2.4/lib/
$ scp hbase-protocol-*.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp hbase-common-*.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp hbase-client-*.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp hbase-server-*.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp metrics-core*.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp client-facing-thirdparty/htrace-core4-4.2.0-incubating.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp hbase-hadoop2-compat-2.2.4.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp ./shaded-clients/hbase-shaded-client-byo-hadoop-2.2.4.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp hbase-zookeeper-2.2.4.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp hive-hbase-handler-3.1.2.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars
$ scp ./shaded-clients/hbase-shaded-mapreduce-2.2.4.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars

3、复制hive的lib下的hive-exec包到spark的jars目录下

$ cd /home/hadoop/opt/apache-hive-3.1.2-bin/lib
$ scp hive-exec-3.1.2.jar master-104:/home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/jars

  ---注:因之前部署spark的时候是将jars下的jar包上传至hdfs中的,上述增加的jar包别忘记同步到hdfs

4、启动spark(master-104机器)
$ cd /home/hadoop/opt/spark-2.3.4-bin-hadoop27-without-hive/sbin
$ ./start-all.sh

5、启动hive(master-102机器)
$ nohup hive --service metastore > metastore.log 2>&1 &
$ nohup hive --service hiveserver2 > hiveserver2.log 2>&1 &

五、验证

1、启动hive交互窗口
$ hive

--在hive中创建hbase的映射表
create table t_student(id int,name string) stored by 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' with serdeproperties("hbase.columns.mapping"=":key,st1:name") tblproperties("hbase.table.name"="t_student","hbase.mapred.output.outputtable" = "t_student");
--查看表
show tables;
--查看创建的表结构
describe t_student;
--插入数据
insert into t_student values('2001','zhangsan2'),('2002','lisi');

2、启动hbase的shell,查看在hive中建立的hbase集成表及表数据
$ hbase shell

 --在hbase中查看建好的表
 list
 --查看建表的结构
 describe 't_student'
 --插入数据
 put 't_student','1001','st1:name','zhangsan'
 put 't_student','1002','st1:name','lisi'
 --查询数据
 scan 't_student'

  注:在集成hbase的过程中缺好多包,使用下面的命令查找缺少指定类的jar包名
  find ./ -name "*.jar" | xargs grep -Hsli org.apache.hadoop.hive.ql.plan.ExprNodeDesc

 六、Phoenix集成

1、解压
$ tar -zxvf apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz

2、修改文件名称
$ mv apache-phoenix-5.0.0-HBase-2.0-bin/ phoenix-5.0.0-HBase2

3、复制hbase的conf下的hbase-site.xml到Phoenix的bin下
$ cd /home/hadoop/opt/hbase-2.2.4
$ cp hbase-site.xml /home/hadoop/opt/phoenix-5.0.0-HBase2/bin/

4、复制Phoenix下的jar包到hbase的lib下
$ cd /home/hadoop/opt/phoenix-5.0.0-HBase2
$ cp phoenix-core-5.0.0-HBase-2.0.jar /home/hadoop/opt/hbase-2.2.4/lib/
$ cp phoenix-5.0.0-HBase-2.0-server.jar /home/hadoop/opt/hbase-2.2.4/lib/

5、启动Phoenix的shell
$ cd /home/hadoop/opt/phoenix-5.0.0-HBase2/bin
####命令后面跟的节点是zookeeper的节点
$ sqlline.py master-104,master-105,master-106:2181

6、Phoenix shell下常用的命令操作

#展示表
!tables
#创建表,直接通过Phoenix创建的hbase表在hbase中不易读
create table test(id integer not null primary key,name varchar);
#插入数据
upsert into test values(1,'Andy');
upsert into test values(2,'Andy');
upsert into test values(3,'Andy');
upsert into test values(4,'Andy');
#查询数据
select * from test;
#删除数据
delete from test where id=4;
#增加一列
alter table test add address varchar;
#删除一列
alter table test drop column address;
#删除表
drop tables;

7、与hbase已有表映射

#在Phoenix下创建之前在hbase中建立的t_student表的映射表
create view "t_student"(
"ROW" varchar primary key,
"st1"."name" varchar
);
# 查询数据
select * from t_student;
posted @ 2022-02-16 15:42  卩s丶Eric  阅读(83)  评论(0编辑  收藏  举报