Centos7部署Hbase2.1.2
https://blog.csdn.net/langkye/article/details/97240948
https://www.cnblogs.com/swordfall/p/8819668.html
https://www.cnblogs.com/zhengna/p/9358578.html
环境
系统|软件 | 版本 |
Centos | 7.6.1810 |
Hadoop | 2.7.6 |
Hbase | 2.1.2 |
Zookeeper | 3.4.9 |
JDK | 1.8 |
SSH | 8.1p1 |
1. 安装Hadoop
# 下载软件包
cd ~ && wget http://archive.apache.org/dist/hadoop/core/hadoop-2.7.6/hadoop-2.7.6.tar.gz
# 解压
mkdir /opt/app && tar -zxf hadoop-2.7.6.tar.gz -C /opt/app
# 配置环境变量
cat >>/etc/profile<<EOF
export HADOOP_HOME=/opt/app/hadoop-2.7.6
export PATH=\$PATH:\$HADOOP_HOME/bin
EOF
source /etc/profile
# 修改hadoop配置文件
# vim /opt/app/hadoop-2.7.6/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_11
# vim /opt/app/hadoop-2.7.6/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>file:///opt/app/hadoop-2.7.6</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://主机IP:9000</value>
</property>
</configuration>
#vim /opt/app/hadoop-2.7.6/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/app/hadoop-2.7.6/tmp/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///opt/app/hadoop-2.7.6/tmp/dfs/data</value>
</property>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
</configuration>
# SSH免密登陆配置(自行配置)
# 格式化hdfs(出现询问,输入yes即可)
cd /opt/app/hadoop-2.7.6 && ./bin/hdfs namenode -format
# 启动hdfs(停止./sbin/stop-dfs.sh)
./sbin/start-dfs.sh
2. 安装Zookeeper
# 下载软件包
cd /opt/app && wget http://archive.apache.org/dist/zookeeper/
# 解压缩
tar -zxvf zookeeper-3.4.9.tar.gz
# 修改zoo.cfg
cd zookeeper-3.4.9/conf && cp zoo_sample.cfg zoo.cfg
mkdir ../data
# 添加下面的参数
vi zoo.cfg
dataDir=/opt/app/zookeeper-3.4.9/data
dataLogDir=/opt/app/zookeeper-3.4.9/data/log
server.1=hadoop252:2888:3888
# 生成myid文件
echo 1 > /opt/app/zookeeper-3.4.9/data/myid
# 开启zookeeper
cd /opt/app/zookeeper-3.4.9/bin/ && ./zkServer.sh start
# 查看状态
jps #出现QuorumPeerMain代表zookeeper正常运转
zkServer.sh status #单机模式,只有一种角色standalone
3. 安装Hbase
# 下载软件包
wget http://archive.apache.org/dist/hbase/2.1.2/hbase-2.1.2-bin.tar.gz
# 修改主机名(随意修改)
hostname hadoop252
echo "hadoop252" > /etc/hostname
# 同步时间
ntpdate ntp1.aliyun.com
# 解压
tar -xzf hbase-2.1.2-bin.tar.gz -C /opt/app/
# 修改hbase-env.sh配置
# vim /opt/app/hbase-2.1.2/conf/hbase-env.sh
export JAVA_HOME=/usr/local/java/jdk1.8.0_11
export HBASE_HOME=/opt/app/hbase-2.1.2
export HBASE_CLASSPATH=/opt/app/hadoop-2.7.6/etc/hadoop
export HBASE_PID_DIR=/opt/app/hbase-2.1.2/pids
export HBASE_MANAGES_ZK=false
# 修改hbase-site.xml
# vim /opt/app/hbase-2.1.2/conf/hbase-site.xml
<configuration>
<!-- 存储目录 这里的hdfs可以是单机版的-->
<property>
<name>hbase.rootdir</name>
<value>hdfs://hadoop252:9000/hbase</value>
<description>The directory shared byregion servers.</description>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/opt/app/hbase-2.1.2/tmp</value>
</property>
<!-- false是单机模式,true是分布式模式 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>hadoop252:2181</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/opt/app/zookeeper-3.4.9</value>
</property>
</configuration>
# 访问IP:50070 hadoop首页
4. 启动Hbase
cd /opt/app/hbase-2.1.2/bin && ./start-hbase.sh
# 启动后访问IP:16010
5. 数据迁移
5.1 表数据导出
# 1.直接导出到服务器
# 导出data_his表数据至服务器/opt/hbase-data/data_his/目录下
hbase org.apache.hadoop.hbase.mapreduce.Export data_his file:///opt/hbase-data/data_his/
# 2.导入到HDFS再导出到本地
# 导出data_his表数据至HDFS /backup/test目录下
hbase org.apache.hadoop.hbase.mapreduce.Export 'data_his' /backup/data_his
# 将/backup/test文件下的数据拉取到本地
hdfs dfs -get /backup/data_his ./
# 删除hdfs上的/backup/test文件夹
hdfs dfs -rm -r /backup/data_his
5.2 表数据导入
# 导入data_his文件到HDFS /data/目录下(-f 覆盖)
hdfs dfs -put -f /opt/data/data_his /backup/
# 查看HDFS /data下的数据
hdfs dfs -ls -h /backup
# 从HDFS导入data_his表数据
hbase org.apache.hadoop.hbase.mapreduce.Import data_his /backup/data_his
# 指定命名空间下的表数据导入
hbase org.apache.hadoop.hbase.mapreduce.Import template:data_his /backup/data_his
5.3 导出指定表部分数据
echo "scan 'gp_energy_data', {TIMERANGE => [1640966400000, 1647401603000], LIMIT=>20000000000}"| hbase shell > dataxcd
6. 常用操作
6.1 hbase shell
# 查看组件状态
status
# 查看所有表
list
# 查看命名空间
list_namespace
# 创建命名空间
create_namespace 'template'
# 在template命名空间下创建表
create 'template:表名',{NAME=...}
# 删除表
drop 'table'
# 清空表
truncate 'table'
# 查看表记录
scan 'table'
# 统计表记录数
count 'table'
# 查看表是否存在
exists 'table'
6.2 hdfs dfs
# 复制文件夹
hdfs dfs -cp /hbase/data/default /backup/
# 创建文件
hdfs dfs -mkdir -p /hbase/data/abc/efg
# 删除文件
hdfs dfs -rm /hbase/abc
# 递归删除
hdfs dfs -rm -r /hbase/data/default/abc
# 查看根下的文件
hdfs dfs -ls /
# 查看文件夹下的文件大小
hdfs dfs -du -h /hbase
7. 部署phoenix
7.1 Hbase与Phoenix版本对应关系
从Phoenix下载地址查看:http://phoenix.apache.org/download.html
下载Phoenix
wget https://mirrors.bfsu.edu.cn/apache/phoenix/apache-phoenix-5.0.0-HBase-2.0/bin/apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
7.2 安装
# 解压
tar -zxvf apache-phoenix-5.0.0-HBase-2.0-bin.tar.gz
# 将phoenix-5.0.0-HBase-2.0-server.jar拷贝到所有hbase节点lib目录下
cp phoenix-core-5.0.0-HBase-2.0.jar phoenix-5.0.0-HBase-2.0-server.jar /home/app/hbase-2.1.2/lib/
修改hbase-site.xml配置文件
<!--使用Phoenix建立hbase二级索引配置 -->
<property>
<name>hbase.regionserver.wal.codec</name>
<value>org.apache.hadoop.hbase.regionserver.wal.IndexedWALEditCodec</value>
</property>
<property>
<name>hbase.region.server.rpc.scheduler.factory.class</name>
<value>org.apache.hadoop.hbase.ipc.PhoenixRpcSchedulerFactory</value>
<description>Factory to create the Phoenix RPC Scheduler that uses separate queues for index and metadata updates</description>
</property>
<property>
<name>hbase.rpc.controllerfactory.class</name>
<value>org.apache.hadoop.hbase.ipc.controller.ServerRpcControllerFactory</value>
<description>Factory to create the Phoenix RPC Scheduler that uses separate queues for index and metadata updates</description>
</property>
<property>
<name>hbase.master.loadbalancer.class</name>
<value>org.apache.phoenix.hbase.index.balancer.IndexLoadBalancer</value>
</property>
<property>
<name>hbase.coprocessor.master.classes</name>
<value>org.apache.phoenix.hbase.index.master.IndexMasterObserver</value>
</property>
<!--使用 schema 和 Namespace 对应的配置 -->
<property>
<name>phoenix.schema.isNamespaceMappingEnabled</name>
<value>true</value>
</property>
<property>
<name>phoenix.schema.mapSystemTablesToNamespace</name>
<value>true</value>
</property>
将hbase/conf/hbase-site.xml复制到phoenix中的bin/目录下
cp hbase-site.xml /home/app/apache-phoenix-5.0.0-HBase-2.0-bin/bin/
重启Hbase
7.3 验证
[root@localhostconf]# cd /home/app/apache-phoenix-5.0.0-HBase-2.0-bin/bin/
[root@localhostbin]# ./sqlline.py hadoop3:2181
Setting property: [incremental, false]
Setting property: [isolation, TRANSACTION_READ_COMMITTED]
issuing: !connect jdbc:phoenix:hadoop3:2181 none none org.apache.phoenix.jdbc.PhoenixDriver
Connecting to jdbc:phoenix:hadoop3:2181
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/app/apache-phoenix-5.0.0-HBase-2.0-bin/phoenix-5.0.0-HBase-2.0-client.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/app/hadoop-2.7.6/share/hadoop/common/lib/slf4j-log4j12-1.7.10.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
21/12/06 11:40:16 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Connected to: Phoenix (version 5.0)
Driver: PhoenixEmbeddedDriver (version 5.0)
Autocommit status: true
Transaction isolation: TRANSACTION_READ_COMMITTED
Building list of tables and columns for tab-completion (set fastconnect to true to skip)...
133/133 (100%) Done
Done
sqlline version 1.2.0
0: jdbc:phoenix:hadoop3:2181>
8. 脚本
8.1 删除命名空间及其下所有表
#!/bin/bash
#读取命名空间下的所有表名
# $1:命名空间名称 $2:表名称
list_tables(){
/home/hbase/hbase-1.0.1.1/bin/hbase shell << EOF
list_namespace_tables "$1"
EOF
}
#删除该命名空间下的表
drop_tables(){
/home/hbase/hbase-1.0.1.1/bin/hbase shell << EOF
disable "$1:$2"
drop "$1:$2"
EOF
}
#删除命名空间
drop_namespace(){
/home/hbase/hbase-1.0.1.1/bin/hbase shell << EOF
drop_namespace "$1"
EOF
}
#获取所有表名
table_info=$(list_tables $1)
#过滤后只剩表名
tables_name=$(echo $table_info | awk -F " TABLE " '{print $2}' | awk -F " row" '{print $1}' | awk -F " " 'OFS=" "{$NF="";print}')
#如果不存在该命名空间就停止执行
first=$(echo $tables_name | awk -F " " '{print $1}')
if [ $first == "ERROR:" ];then
exit 0
fi
#循环执行删除该命名空间下的表
for s in ${tables_name[@]}
do
echo "传入参数:$1"
echo "表名:$s"
drop_tables $1 $s
done
#删除命名空间
drop_namespace $1
脚本摘自:https://blog.csdn.net/ck978105293/article/details/109047528
9. 遇到的问题
9.1 Waiting for dfs to exit safe mode...
# hadoop日志显示Waiting for dfs to exit safe mode...
# 这是因为hadoop处于安全模式
# 关闭安全模式
[root@10.4.7.200 hadoop]# hadoop dfsadmin -safemode leave
DEPRECATED: Use of this script to execute hdfs command is deprecated.
Instead use the hdfs command for it.
Safe mode is OFF
9.2 ERROR: For input string: "FOREVER"
# 建表时指定TTL值为FOREVER报错(hbase版本1.0.1.1)
hbase(main):011:0> create 'eco', {NAME => 'column', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => 'FOREVER', KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}
ERROR: For input string: "FOREVER"
Here is some help for this command:
Creates a table. Pass a table name, and a set of column family
specifications (at least one), and, optionally, table configuration.
Column specification can be a simple string (name), or a dictionary
(dictionaries are described below in main help output), necessarily
including NAME attribute.
Examples:
Create a table with namespace=ns1 and table qualifier=t1
hbase> create 'ns1:t1', {NAME => 'f1', VERSIONS => 5}
Create a table with namespace=default and table qualifier=t1
hbase> create 't1', {NAME => 'f1'}, {NAME => 'f2'}, {NAME => 'f3'}
hbase> # The above in shorthand would be the following:
hbase> create 't1', 'f1', 'f2', 'f3'
hbase> create 't1', {NAME => 'f1', VERSIONS => 1, TTL => 2592000, BLOCKCACHE => true}
hbase> create 't1', {NAME => 'f1', CONFIGURATION => {'hbase.hstore.blockingStoreFiles' => '10'}}
Table configuration options can be put at the end.
Examples:
hbase> create 'ns1:t1', 'f1', SPLITS => ['10', '20', '30', '40']
hbase> create 't1', 'f1', SPLITS => ['10', '20', '30', '40']
hbase> create 't1', 'f1', SPLITS_FILE => 'splits.txt', OWNER => 'johndoe'
hbase> create 't1', {NAME => 'f1', VERSIONS => 5}, METADATA => { 'mykey' => 'myvalue' }
hbase> # Optionally pre-split the table into NUMREGIONS, using
hbase> # SPLITALGO ("HexStringSplit", "UniformSplit" or classname)
hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit'}
hbase> create 't1', 'f1', {NUMREGIONS => 15, SPLITALGO => 'HexStringSplit', REGION_REPLICATION => 2, CONFIGURATION => {'hbase.hregion.scan.loadColumnFamiliesOnDemand' => 'true'}}
You can also keep around a reference to the created table:
hbase> t1 = create 't1', 'f1'
Which gives you a reference to the table named 't1', on which you can then
call methods.
# 解决办法:指定TTL值为2147483647
hbase(main):014:0> create 'eco', {NAME => 'column', DATA_BLOCK_ENCODING => 'NONE', BLOOMFILTER => 'ROW', REPLICATION_SCOPE => '0', VERSIONS => '1', COMPRESSION => 'NONE', MIN_VERSIONS => '0', TTL => 2147483647, KEEP_DELETED_CELLS => 'false', BLOCKSIZE => '65536', IN_MEMORY => 'false', BLOCKCACHE => 'true'}
0 row(s) in 0.7010 seconds
=> Hbase::Table - eco
9.3 java.io.FileNotFoundException: File does not exist hdfs://*.jar
# 使用一系列搜索引擎后找到这篇文章 https://stackoverflow.com/questions/28213244/hadoop-accessing-3rd-party-libraries-from-local-file-system-of-a-hadoop-node
# 需要把相关的jar包都put到HDFS上
hadoop fs -mkdir -p hdfsPath
hadoop fs -put localfile/*.jar hdfsPath
本文来自博客园,作者:MegaloBox,转载请注明原文链接:https://www.cnblogs.com/cpw6/p/13368839.html