HBase的Shell命令和JavaAPI
HBase的shell操作和JavaAPI的使用:
Shell
表操作
创建表
create 'student','info' #表名 列族
插入表
put 'student','1001','info:sex','male' put 'student','1001','info:age','18' put 'student','1002','info:name','Janna' put 'student','1002','info:sex','female' put 'student','1002','info:age','20'
查看表数据
scan 'student' scan 'student',{STARTROW => '1001', STOPROW => '1002'} #左闭右开 scan 'student',{STARTROW => '1001'}
查看表结构
describe 'student' desc 'student' #效果相同
更新指定字段
put 'student','1001','info:name','Nick' put 'student','1001','info:age','100' # 表明 rowkey 列族:列 值
查看指定行数据
get 'student','1001'
统计表行数
count 'student'
删除数据
deleteall 'student','1001'
删除rowkey的某一列
delete 'student','1002','info:sex'
清空数据
truncate 'student' #表的操作顺序为先disable,然后再truncate。
删除表
disable 'student'
表更表信息
alter 'student',{NAME=>'info',VERSIONS=>3} ##将info列族中的数据存放3个版本:
Java API
环境准备
<dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-server</artifactId> <version>1.3.1</version> </dependency> <dependency> <groupId>org.apache.hbase</groupId> <artifactId>hbase-client</artifactId> <version>1.3.1</version> </dependency>
HBaseAPI
获取Configuration对象
public static Configuration conf; static{ //使用HBaseConfiguration的单例方法实例化 conf = HBaseConfiguration.create(); conf.set("hbase.zookeeper.quorum", "192.168.1.101"); ## 换成自己的ZK节点IP conf.set("hbase.zookeeper.property.clientPort", "2181"); }
判断表是否存在
public static boolean isTableExist(String tableName) throws MasterNotRunningException, ZooKeeperConnectionException, IOException{ //在HBase中管理、访问表需要先创建HBaseAdmin对象 HBaseAdmin admin = new HBaseAdmin(conf); return admin.tableExists(tableName); }
创建表
public static void createTable(String tableName, String... columnFamily) throws MasterNotRunningException, ZooKeeperConnectionException, IOException{ HBaseAdmin admin = new HBaseAdmin(conf); //判断表是否存在 if(isTableExist(tableName)){ System.out.println("表" + tableName + "已存在"); //System.exit(0); }else{ //创建表属性对象,表名需要转字节 HTableDescriptor descriptor = new HTableDescriptor(TableName.valueOf(tableName)); //创建多个列族 for(String cf : columnFamily){ descriptor.addFamily(new HColumnDescriptor(cf)); } //根据对表的配置,创建表 admin.createTable(descriptor); System.out.println("表" + tableName + "创建成功!"); } }
删除表
public static void dropTable(String tableName) throws MasterNotRunningException, ZooKeeperConnectionException, IOException{ HBaseAdmin admin = new HBaseAdmin(conf); if(isTableExist(tableName)){ admin.disableTable(tableName); admin.deleteTable(tableName); System.out.println("表" + tableName + "删除成功!"); }else{ System.out.println("表" + tableName + "不存在!"); } }
插入数据
public static void addRowData(String tableName, String rowKey, String columnFamily, String column, String value) throws IOException{ //创建HTable对象 HTable hTable = new HTable(conf, tableName); //向表中插入数据 Put put = new Put(Bytes.toBytes(rowKey)); //向Put对象中组装数据 put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); hTable.put(put); hTable.close(); System.out.println("插入数据成功"); }
删除多行数据
public static void deleteMultiRow(String tableName, String... rows) throws IOException{ HTable hTable = new HTable(conf, tableName); List<Delete> deleteList = new ArrayList<Delete>(); for(String row : rows){ Delete delete = new Delete(Bytes.toBytes(row)); deleteList.add(delete); } hTable.delete(deleteList); hTable.close(); }
获取所有数据
public static void getAllRows(String tableName) throws IOException{ HTable hTable = new HTable(conf, tableName); //得到用于扫描region的对象 Scan scan = new Scan(); //使用HTable得到resultcanner实现类的对象 ResultScanner resultScanner = hTable.getScanner(scan); for(Result result : resultScanner){ Cell[] cells = result.rawCells(); for(Cell cell : cells){ //得到rowkey System.out.println("行键:" + Bytes.toString(CellUtil.cloneRow(cell))); //得到列族 System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell))); System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell))); System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell))); } } }
获取某一行数据
public static void getRow(String tableName, String rowKey) throws IOException{ HTable table = new HTable(conf, tableName); Get get = new Get(Bytes.toBytes(rowKey)); //get.setMaxVersions();显示所有版本 //get.setTimeStamp();显示指定时间戳的版本 Result result = table.get(get); for(Cell cell : result.rawCells()){ System.out.println("行键:" + Bytes.toString(result.getRow())); System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell))); System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell))); System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell))); System.out.println("时间戳:" + cell.getTimestamp()); } }
获取某一行指定“列族:列”的数据
public static void getRowQualifier(String tableName, String rowKey, String family, String qualifier) throws IOException{ HTable table = new HTable(conf, tableName); Get get = new Get(Bytes.toBytes(rowKey)); get.addColumn(Bytes.toBytes(family), Bytes.toBytes(qualifier)); Result result = table.get(get); for(Cell cell : result.rawCells()){ System.out.println("行键:" + Bytes.toString(result.getRow())); System.out.println("列族" + Bytes.toString(CellUtil.cloneFamily(cell))); System.out.println("列:" + Bytes.toString(CellUtil.cloneQualifier(cell))); System.out.println("值:" + Bytes.toString(CellUtil.cloneValue(cell))); } }
MapReduce
HBase的相关JavaAPI,可以实现伴随HBase操作的MapReduce过程,使用MapReduce将数据从本地文件系统导入到HBase的表中或者从HBase中读取一些原始数据后使用MapReduce做数据分析。
Hive集成Hbase
编译:hive-hbase-handler-1.2.2.jar
ln -s $HBASE_HOME/lib/hbase-common-1.3.1.jar $HIVE_HOME/lib/hbase-common-1.3.1.jar ln -s $HBASE_HOME/lib/hbase-server-1.3.1.jar $HIVE_HOME/lib/hbase-server-1.3.1.jar ln -s $HBASE_HOME/lib/hbase-client-1.3.1.jar $HIVE_HOME/lib/hbase-client-1.3.1.jar ln -s $HBASE_HOME/lib/hbase-protocol-1.3.1.jar $HIVE_HOME/lib/hbase-protocol-1.3.1.jar ln -s $HBASE_HOME/lib/hbase-it-1.3.1.jar $HIVE_HOME/lib/hbase-it-1.3.1.jar ln -s $HBASE_HOME/lib/htrace-core-3.1.0-incubating.jar $HIVE_HOME/lib/htrace-core-3.1.0-incubating.jar ln -s $HBASE_HOME/lib/hbase-hadoop2-compat-1.3.1.jar $HIVE_HOME/lib/hbase-hadoop2-compat-1.3.1.jar ln -s $HBASE_HOME/lib/hbase-hadoop-compat-1.3.1.jar $HIVE_HOME/lib/hbase-hadoop-compat-1.3.1.jar
hive-site.xml中修改zookeeper的属性,如下:
<property> <name>hive.zookeeper.quorum</name> <value>datanode1,datanode2,datanode3</value> <description>The list of ZooKeeper servers to talk to. This is only needed for read/write locks.</description> </property> <property> <name>hive.zookeeper.client.port</name> <value>2181</value> <description>The port of ZooKeeper servers to talk to. This is only needed for read/write locks.</description> </property>
案例
创建hive关联表
CREATE TABLE hive_hbase_emp_table( empno int, ename string, job string, mgr int, hiredate string, sal double, comm double, deptno int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:ename,info:job,info:mgr,info:hiredate,info:sal,info:comm,info:deptno") TBLPROPERTIES ("hbase.table.name" = "hbase_hive_emp_table_");
在Hive中创建临时中间表,用于load文件中的数据
CREATE TABLE emp( empno int, ename string, job string, mgr int, hiredate string, sal double, comm double, deptno int) row format delimited fields terminated by ',';
向Hive中间表中load数据
load data local inpath '/home/hadoop/emp.csv' into table emp;
通过insert命令将中间表中的数据导入到Hive关联HBase的那张表中
insert into table hive_hbase_emp_table select * from emp;
查看HIVE表
select * from hive_hbase_emp_table;
查看HBase表
scan ‘hbase_emp_table’
在HBase中已经存储了某一张表hbase_emp_table,然后在Hive中创建一个外部表来关联HBase中的hbase_emp_table这张表,使之可以借助Hive来分析HBase这张表中的数据。
Hive创建表
CREATE EXTERNAL TABLE relevance_hbase_emp( empno int, ename string, job string, mgr int, hiredate string, sal double, comm double, deptno int) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,info:ename,info:job,info:mgr,info:hiredate,info:sal,info:comm,info:deptno") TBLPROPERTIES ("hbase.table.name" = "hbase_emp_table");
关联后就可以使用Hive函数进行一些分析操作了
select * from relevance_hbase_emp;