大数据HBase JavaAPI操作
目录
1 环境准备
新建项目后在 pom.xml 中添加依赖:
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.3.1</version>
</dependency>
2 HBaseAPI
2.1 获取 Configuration 对象
public static Configuration conf;
static{
//使用 HBaseConfiguration 的单例方法实例化
conf = HBaseConfiguration.create();
conf.set("hbase.zookeeper.quorum", "192.166.9.102");
conf.set("hbase.zookeeper.property.clientPort", "2181");
}
//或者注入到spring
@Configuration
@ConfigurationProperties(prefix = "hbase")
public class HbaseConfiguration {
private Map<String, String> config = new HashMap<>();
public Map<String, String> getConfig() {
return config;
}
public void setConfig(Map<String, String> config) {
this.config = config;
}
public org.apache.hadoop.conf.Configuration configuration() {
org.apache.hadoop.conf.Configuration configuration = HBaseConfiguration.create();
for(Map.Entry<String, String> map : config.entrySet()){
configuration.set(map.getKey(), map.getValue());
}
return configuration;
}
@Bean
public Admin admin() {
Admin admin = null;
try {
Connection connection = ConnectionFactory.createConnection(configuration());
admin = connection.getAdmin();
} catch (IOException e) {
e.printStackTrace();
}
return admin;
}
}
2.2 判断表是否存在
/**
* 判断表是否存在 新版本
* @param tableName 表名
* @return true/false
*/
public boolean isExists(String tableName) {
boolean tableExists = false;
try {
TableName table = TableName.valueOf(tableName);
tableExists = hbaseAdmin.tableExists(table);
} catch (IOException e) {
e.printStackTrace();
}
return tableExists;
}
HBaseAdmin对象都是DDL操作表,
2.3 创建表
public static void createTable(String tableName, String... columnFamily) throws MasterNotRunningException, ZooKeeperConnectionException,IOException{
HBaseAdmin admin = new HBaseAdmin(conf);
//判断表是否存在
if(isTableExist(tableName)){
System.out.println("表" + tableName + "已存在");
//System.exit(0);
}else{
//创建表属性对象,表名需要转字节
HTableDescriptor descriptor = new
HTableDescriptor(TableName.valueOf(tableName));
//创建多个列族
for(String cf : columnFamily){
//添加列族信息
descriptor.addFamily(new HColumnDescriptor(cf));
}
//根据对表的配置,创建表
admin.createTable(descriptor);
System.out.println("表" + tableName + "创建成功!");
}
}
//封装好的方法,应该在创建的时候先创建最终的方法然后再逐步完善参数,但是熟悉的话可以正着写不过去了公司这都是封装好的知道有这个东西就好
/*应该先做一个columnFamily的非空判断*/
public boolean createTable(String tableName, List<String> columnFamily, List<String> keys) {
if (!isExists(tableName)) {
try {
TableName table = TableName.valueOf(tableName);
HTableDescriptor desc = new HTableDescriptor(table);
for (String cf : columnFamily) {
//创建列族描述器
desc.addFamily(new HColumnDescriptor(cf));
}
if (keys == null) {
hbaseAdmin.createTable(desc);
} else {
byte[][] splitKeys = getSplitKeys(keys);
hbaseAdmin.createTable(desc, splitKeys);
}
return true;
} catch (IOException e) {
e.printStackTrace();
}
} else {
System.out.println(tableName + "is exists!!!");
return false;
}
return false;
}
2.4 删除表
public static void dropTable(String tableName) throws
MasterNotRunningException,
ZooKeeperConnectionException, IOException{
HBaseAdmin admin = new HBaseAdmin(conf);
if(isTableExist(tableName)){
admin.disableTable(tableName);
admin.deleteTable(tableName);
System.out.println("表" + tableName + "删除成功!");
}else{
System.out.println("表" + tableName + "不存在!");
}
}
命名空间管理(一般不指定命名空间是再default) 命名空间可以被创建、移除、修改。
表和命名空间的隶属关系在在创建表时决定,通过以下格式指定: 当为一张表指定命名空间之后,对表的操作都要加命名空间,否则会找不到表。 相关shell操作如下所示: 创建一个命名空间 根据命名空间创建表 此时命名空间namespace应该存在,否则报错。 删除命名空间
在删除一个命名空间时,该命名空间不能包含任何的表,否则会报错。
修改命名空间 METHOD => ‘set’ ‘PROPERTY_NAME’ => ‘PROPERTY_VALUE’ 显示所有命名空间
2.5 向表中插入数据
public static void addRowData(String tableName, String rowKey,
String columnFamily, String
column, String value) throws IOException{
//创建 HTable 对象
HTable hTable = new HTable(conf, tableName);
//向表中插入数据
Put put = new Put(Bytes.toBytes(rowKey));
//向 Put 对象中组装数据
put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column),
Bytes.toBytes(value));
hTable.put(put);
hTable.close();
System.out.println("插入数据成功");
}
单条和封装键值对查询数据
/**
* 插入数据(单条)
*
* @param tableName 表名
* @param rowKey rowKey
* @param columnFamily 列族
* @param column 列
* @param value 值
* @return true/false
*/
public boolean putData(String tableName, String rowKey, String columnFamily, String column,
String value) {
//键值对
return putData(tableName, rowKey, columnFamily, Collections.singletonMap(column, value));
}
/**
* 插入数据(批量需要遍历插入数据)
*
* @param tableName 表名
* @param rowKey rowKey
* @param columnFamily 列族
* @param columns 列值
* @return true/false
*/
public boolean putData(String tableName, String rowKey, String columnFamily,
Map<String, String> columns) {
try {
Table table = hbaseAdmin.getConnection().getTable(TableName.valueOf(tableName));
Put put = new Put(Bytes.toBytes(rowKey));
for (Map.Entry<String, String> entry : columns.entrySet()) {
put.addColumn(Bytes.toBytes(columnFamily), Bytes.toBytes(entry.getKey()),
Bytes.toBytes(entry.getValue()));
}
table.put(put);
table.close();
return true;
} catch (IOException e) {
e.printStackTrace();
return false;
}
}
2.6 删除多行数据
public static void deleteMultiRow(String tableName, String... rows)
throws IOException{
HTable hTable = new HTable(conf, tableName);
List<Delete> deleteList = new ArrayList<Delete>();
for(String row : rows){
Delete delete = new Delete(Bytes.toBytes(row));
deleteList.add(delete);
}
hTable.delete(deleteList);
hTable.close();
}
2.7 获取所有数据
public static void getAllRows(String tableName) throws IOException{
HTable hTable = new HTable(conf, tableName);
//得到用于扫描 region 的对象
Scan scan = new Scan();
//使用 HTable 得到 resultcanner 实现类的对象
ResultScanner resultScanner = hTable.getScanner(scan);
for(Result result : resultScanner){
Cell[] cells = result.rawCells();
for(Cell cell : cells){
//得到 rowkey
System.out.println(" 行 键 :" +
Bytes.toString(CellUtil.cloneRow(cell)));
//得到列族
System.out.println(" 列 族 " +
Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(" 列 :" +
Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(" 值 :" +
Bytes.toString(CellUtil.cloneValue(cell)));
}
}
}
2.8 获取某一行数据
public static void getRow(String tableName, String rowKey) throws
IOException{
HTable table = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes(rowKey));
//get.setMaxVersions();显示所有版本
//get.setTimeStamp();显示指定时间戳的版本
Result result = table.get(get);
for(Cell cell : result.rawCells()){
System.out.println(" 行 键 :" +Bytes.toString(result.getRow()));
System.out.println(" 列 族 " +
Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(" 列 :" +
Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(" 值 :" +
Bytes.toString(CellUtil.cloneValue(cell)));
System.out.println("时间戳:" + cell.getTimestamp());
}
}
2.9 获取某一行指定“列族:列”的数据
public static void getRowQualifier(String tableName, String rowKey,
String family, String
qualifier) throws IOException{
HTable table = new HTable(conf, tableName);
Get get = new Get(Bytes.toBytes(rowKey));
get.addColumn(Bytes.toBytes(family),
Bytes.toBytes(qualifier));
Result result = table.get(get);
for(Cell cell : result.rawCells()){
System.out.println(" 行 键 :" +
Bytes.toString(result.getRow()));
System.out.println(" 列 族 " +
Bytes.toString(CellUtil.cloneFamily(cell)));
System.out.println(" 列 :" +
Bytes.toString(CellUtil.cloneQualifier(cell)));
System.out.println(" 值 :" +
Bytes.toString(CellUtil.cloneValue(cell)));
}
}
3 HBase过滤器过滤数据
测试数据生成:
package demo.filter;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
public class DataInit {
@Test
public void testCreateTable() throws Exception{
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//创建一个HBase客户端: HBaseAdmin
HBaseAdmin admin = new HBaseAdmin(conf);
//创建一个表的描述符: 表名
HTableDescriptor hd = new HTableDescriptor(TableName.valueOf("emp"));
//创建列族描述符
HColumnDescriptor hcd1 = new HColumnDescriptor("empinfo");
//加入列族
hd.addFamily(hcd1);
//创建表
admin.createTable(hd);
//关闭客户端
admin.close();
}
@Test
public void testPutData() throws Exception{
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//客户端
HTable table = new HTable(conf, "emp");
//第一条数据
Put put1 = new Put(Bytes.toBytes("7369"));
put1.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("SMITH"));
Put put2 = new Put(Bytes.toBytes("7369"));
put2.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("800"));
//第二条数据
Put put3 = new Put(Bytes.toBytes("7499"));
put3.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("ALLEN"));
Put put4 = new Put(Bytes.toBytes("7499"));
put4.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("1600"));
//第三条数据
Put put5 = new Put(Bytes.toBytes("7521"));
put5.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("WARD"));
Put put6 = new Put(Bytes.toBytes("7521"));
put6.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("1250"));
//第四条数据
Put put7 = new Put(Bytes.toBytes("7566"));
put7.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("JONES"));
Put put8 = new Put(Bytes.toBytes("7566"));
put8.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("2975"));
//第五条数据
Put put9 = new Put(Bytes.toBytes("7654"));
put9.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("MARTIN"));
Put put10 = new Put(Bytes.toBytes("7654"));
put10.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("1250"));
//第六条数据
Put put11 = new Put(Bytes.toBytes("7698"));
put11.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("BLAKE"));
Put put12 = new Put(Bytes.toBytes("7698"));
put12.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("2850"));
//第七条数据
Put put13 = new Put(Bytes.toBytes("7782"));
put13.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("CLARK"));
Put put14 = new Put(Bytes.toBytes("7782"));
put14.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("2450"));
//第八条数据
Put put15 = new Put(Bytes.toBytes("7788"));
put15.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("SCOTT"));
Put put16 = new Put(Bytes.toBytes("7788"));
put16.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("3000"));
//第九条数据
Put put17 = new Put(Bytes.toBytes("7839"));
put17.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("KING"));
Put put18 = new Put(Bytes.toBytes("7839"));
put18.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("5000"));
//第十条数据
Put put19 = new Put(Bytes.toBytes("7844"));
put19.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("TURNER"));
Put put20 = new Put(Bytes.toBytes("7844"));
put20.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("1500"));
//第十一条数据
Put put21 = new Put(Bytes.toBytes("7876"));
put21.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("ADAMS"));
Put put22 = new Put(Bytes.toBytes("7876"));
put22.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("1100"));
//第十二条数据
Put put23 = new Put(Bytes.toBytes("7900"));
put23.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("JAMES"));
Put put24 = new Put(Bytes.toBytes("7900"));
put24.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("950"));
//第十三条数据
Put put25 = new Put(Bytes.toBytes("7902"));
put25.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("FORD"));
Put put26 = new Put(Bytes.toBytes("7902"));
put26.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("3000"));
//第十四条数据
Put put27 = new Put(Bytes.toBytes("7934"));
put27.add(Bytes.toBytes("empinfo"), Bytes.toBytes("ename"), Bytes.toBytes("MILLER"));
Put put28 = new Put(Bytes.toBytes("7934"));
put28.add(Bytes.toBytes("empinfo"), Bytes.toBytes("sal"), Bytes.toBytes("1300"));
//构造List
List<Put> list = new ArrayList<Put>();
list.add(put1);
list.add(put2);
list.add(put3);
list.add(put4);
list.add(put5);
list.add(put6);
list.add(put7);
list.add(put8);
list.add(put9);
list.add(put10);
list.add(put11);
list.add(put12);
list.add(put13);
list.add(put14);
list.add(put15);
list.add(put16);
list.add(put17);
list.add(put18);
list.add(put19);
list.add(put20);
list.add(put21);
list.add(put22);
list.add(put23);
list.add(put24);
list.add(put25);
list.add(put26);
list.add(put27);
list.add(put28);
//插入数据
table.put(list);
table.close();
}
}
过滤的demo:
package demo.filter;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.ColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.FilterList;
import org.apache.hadoop.hbase.filter.FilterList.Operator;
import org.apache.hadoop.hbase.filter.MultipleColumnPrefixFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.junit.Test;
public class FilterDemo {
//1、列值过滤器:根据列的值进行查询 where sal=3000
@Test
public void testSingleColumnValueFilter() throws Exception{
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//客户端
HTable table = new HTable(conf, "emp");
//创建一个扫描器和列值过滤器
Scan scan = new Scan();
SingleColumnValueFilter filter = new SingleColumnValueFilter(Bytes.toBytes("empinfo"), //列族
Bytes.toBytes("sal"), //列
CompareOp.EQUAL, //比较运算符
Bytes.toBytes("3000"));
scan.setFilter(filter);
//查询数据
ResultScanner rs = table .getScanner(scan);
for(Result r:rs) {
//取出数据
String name = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
System.out.println(name);
}
table.close();
}
//2、列名前缀过滤器:查询指定的列,查询员工的姓名 select ename from emp;
@Test
public void testColumnPrefixFilter() throws Exception{
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//客户端
HTable table = new HTable(conf, "emp");
//创建一个扫描器和列名前缀过滤器
Scan scan = new Scan();
ColumnPrefixFilter filter = new ColumnPrefixFilter(Bytes.toBytes("ename"));
scan.setFilter(filter);
//查询数据
ResultScanner rs = table .getScanner(scan);
for(Result r:rs) {
//取出数据
String name = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
System.out.println(name);
}
table.close();
}
//3、多个列名前缀过滤器:查询指定多个列,查询员工的姓名和薪水:select ename,sal from emp;
@Test
public void testMultipleColumnPrefixFilter() throws Exception{
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//客户端
HTable table = new HTable(conf, "emp");
//创建一个扫描器和多个列名前缀过滤器
Scan scan = new Scan();
byte[][] prefix = {Bytes.toBytes("ename"),Bytes.toBytes("sal")};
MultipleColumnPrefixFilter filter = new MultipleColumnPrefixFilter(prefix);
scan.setFilter(filter);
//查询数据
ResultScanner rs = table .getScanner(scan);
for(Result r:rs) {
//取出数据
String name = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
String sal = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
System.out.println(name+"\t"+sal);
}
table.close();
}
//4、Rowkey过滤器:通过行键进行查询
@Test
public void testRowFilter() throws Exception{
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//客户端
HTable table = new HTable(conf, "emp");
//创建一个扫描器
Scan scan = new Scan();
//定义一个行键过滤器
//查询行键是7839的员工信息
RowFilter filter = new RowFilter(CompareOp.EQUAL, //比较运算符
new RegexStringComparator("7839")); //使用正则表达式表示值
scan.setFilter(filter);
//查询数据
ResultScanner rs = table .getScanner(scan);
for(Result r:rs) {
//取出数据
String name = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
String sal = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
System.out.println(name+"\t"+sal);
}
table.close();
}
//5、查询中,组合多个过滤器
@Test
public void testFilters() throws Exception{
/*
* 查询工资等于3000的员工姓名
* 1、列值过滤器,得到工资等于3000
* 2、列名前缀的过滤器,得到姓名
*/
//指定的配置信息: ZooKeeper
Configuration conf = new Configuration();
conf.set("hbase.zookeeper.quorum", "192.168.157.111");
//客户端
HTable table = new HTable(conf, "emp");
//创建一个扫描器
Scan scan = new Scan();
//第一个过滤器 列值过滤器,得到工资等于3000
SingleColumnValueFilter filter1 = new SingleColumnValueFilter(Bytes.toBytes("empinfo"), //列族
Bytes.toBytes("sal"), //列
CompareOp.EQUAL, //比较运算符
Bytes.toBytes("3000"));
//第二个过滤器:列名前缀过滤器,得到员工的姓名
ColumnPrefixFilter filter2 = new ColumnPrefixFilter(Bytes.toBytes("ename"));
//Operator.MUST_PASS_ALL:相当于是and
//Operator.MUST_PASS_ONE:相当于是or
FilterList list = new FilterList(Operator.MUST_PASS_ALL);
list.addFilter(filter1);
list.addFilter(filter2);
scan.setFilter(list);
//查询数据
ResultScanner rs = table .getScanner(scan);
for(Result r:rs) {
//取出数据
String name = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("ename")));
String sal = Bytes.toString(r.getValue(Bytes.toBytes("empinfo"), Bytes.toBytes("sal")));
System.out.println(name+"\t"+sal);
}
table.close();
}
}