Hbase工具类-API操作
1. HbaseUtil工具类
Hbase虽然提供了相关的API,但是在实际使用过程中还是非常麻烦,因此根据官方的API封装对应的工具类,从而简化开发操作。
package com.king
import java.math.BigDecimal
import java.util
import com.alibaba.fastjson.JSONObject
import com.king.constant.EnvConstant
import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, HConstants, TableName}
import org.apache.hadoop.hbase.client.{BufferedMutatorParams, ConnectionFactory, Delete, Get, Put, Scan, Table}
import org.apache.hadoop.hbase.util.Bytes
import scala.collection.mutable.ListBuffer
/**
* @Author: KingWang
* @Date: 2022/10/12
* @Desc:
**/
class HbaseUtil {
val connection = {
println("初始化connection")
val conf = HBaseConfiguration.create()
conf.set(HConstants.ZOOKEEPER_QUORUM, EnvConstant.HBASE_ZOOKEEPER_QUORUM)
ConnectionFactory.createConnection(conf)
}
/**
* tableName由命名空间:表名拼接而成
* 例如:ods:user
* @param tableName
* @return
*/
def getTable(tableName:String):Table = {
connection.getTable(TableName.valueOf(tableName))
}
/**
* 插入某一行中的某一列
* @param tableName
* @param rowKey
* @param columnFamily
* @param columnName
* @param value
*/
def putCell(tableName:String, rowKey:String, columnFamily:String, columnName:String,value:String) = {
//获取table
val table = getTable(tableName)
//创建put对象
val put = new Put(Bytes.toBytes(rowKey))
//给put对象添加数据
put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName),Bytes.toBytes(value))
//将put写入table
table.put(put)
//关闭table
table.close()
}
def toPlainString(value: Any): String = {
val stringValue = value match {
case decimal: BigDecimal => decimal.toPlainString
case _ => String.valueOf(value)
}
stringValue
}
/**
* 指定列簇插入一个对象
* @param tableName
* @param rowKey
* @param columnFamily
* @param obj
*/
def putRow(tableName:String, rowKey:String,columnFamily:String,obj:JSONObject):Unit = {
putRowByRemoveEmpty(tableName,rowKey,columnFamily,obj)
}
/**
* 根据rowkey插入或者更新数据
* @param tableName
* @param rowkey
* @param colFamily
* @param obj
* @param isRemoveEmpty
*/
def putRowByRemoveEmpty(tableName:String, rowkey: String, colFamily: String, obj: JSONObject, isRemoveEmpty: Boolean = true): Unit = {
val table = getTable(tableName)
val put = new Put(Bytes.toBytes(rowkey))
obj.forEach((k,v) =>{
if(isRemoveEmpty){
if(valueNotEmpty(v)){
put.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(k), Bytes.toBytes(toPlainString(v)))
}
// else{} //值为空的字段不更新
}else{
put.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(k), Bytes.toBytes(toPlainString(v)))
}
})
table.put(put)
table.close()
}
/**
*
* @param tableName
* @param rowKeyField rowkey对应JSONObject中的字段名
* @param columnFamily 列簇, 仅有1个
* @param list
*/
def putList(tableName:String, rowKeyField:String,columnFamily:String,list:ListBuffer[JSONObject]):Unit = {
putListByRemoveEmpty(tableName,rowKeyField,columnFamily,list)
}
/**
*
* @param tableName
* @param rowKeyField rowkey对应JSONObject中的字段名
* @param columnFamily 列簇, 仅有1个
* @param list
* @param isRemoveEmpty 是否移除list对象中的空元素,默认是。这里指空值不会更新到字段中
* @param batchSize 每批次提交记录数,默认1000
* @param bufferSize
*/
def putListByRemoveEmpty(tableName:String, rowKeyField:String,columnFamily:String,list:ListBuffer[JSONObject],isRemoveEmpty: Boolean = true, batchSize:Int = 1000, bufferSize:Int = 1024*1024*24):Unit = {
val params = new BufferedMutatorParams(TableName.valueOf(tableName))
params.writeBufferSize(bufferSize)
val bufferMutator = connection.getBufferedMutator(params)
val putList:util.List[Put] = new util.ArrayList()
var count = 0
list.foreach(obj => {
val rowKey = obj.getString(rowKeyField)
val put = new Put(Bytes.toBytes(rowKey))
obj.forEach((k,v)=>{
if(isRemoveEmpty){
if(valueNotEmpty(v)){
put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(k),Bytes.toBytes(toPlainString(v)))
}
}else{
put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(k),Bytes.toBytes(toPlainString(v)))
}
})
putList.add(put)
count = count + 1
if(count % batchSize == 0){
bufferMutator.mutate(putList)
bufferMutator.flush()
putList.clear()
}
})
if(putList.size() > 0){
bufferMutator.mutate(putList)
bufferMutator.flush()
putList.clear()
}
bufferMutator.close()
}
def valueNotEmpty(value: Any): Boolean = {
value != null && !"null".equalsIgnoreCase(value.toString) && StringUtils.isNotBlank(value.toString)
}
/**
* 指定列簇插入对象
* @param tableName
* @param rowKeyField
* @param columnFamily
* @param obj
*/
def putRowByRowKeyField(tableName:String, rowKeyField:String,columnFamily:String,obj:JSONObject):Unit = {
val table = getTable(tableName)
val rowKey = obj.getString(rowKeyField)
if(null == rowKey || "".equals(rowKey)){
throw new Exception("请指定obj中rowKey的字段名")
}
val put = new Put(Bytes.toBytes(rowKey))
obj.forEach((k,v)=> {
put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(k),Bytes.toBytes(toPlainString(v)))
})
table.put(put)
table.close()
}
/**
* 读取某一行中的某一列
* @param tableName
* @param rowKey
* @param columnFamily
* @param columnName
* @return
*/
def getCells(tableName:String, rowKey:String, columnFamily:String,columnName:String):String = {
//获取table
val table = getTable(tableName)
//创建get对象
val get = new Get(Bytes.toBytes(rowKey))
//如果直接用get方法读取数据,会读取一整行数据
//如果要读取某一列的数据,需要添加对应的参数
get.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName))
//设置读取数据的版本
get.readAllVersions()
//或者读取最新的版本
// val maxVersion = get.getMaxVersions
// get.readVersions(maxVersion) 也可以指定读取某1个版本
val result = table.get(get)
val value = Bytes.toString(result.getValue(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName)))
//处理数据
// val cells = result.rawCells()
// cells.foreach(x=> println(new String(CellUtil.cloneValue(x))))
table.close()
value
}
/**
* 读取一行记录,指定列簇,以及列簇中的字段
* @param tableName
* @param rowKey
* @param columnFamily
* @param columns
* @return
*/
def getRow(tableName:String, rowKey:String, columnFamily:String,columns:Array[String]):Option[JSONObject] = {
val obj = new JSONObject()
try{
//获取table
val table = getTable(tableName)
//创建get对象
val get = new Get(Bytes.toBytes(rowKey))
if(!table.exists(get)){
return None
}
val result = table.get(get)
columns.foreach(column=>{
val value = Bytes.toString(result.getValue(Bytes.toBytes(columnFamily),Bytes.toBytes(column)))
obj.put(column, value)
})
table.close()
}catch {
case e:Exception => e.printStackTrace()
}
Some(obj)
}
/**
* 读取一行记录,不指定列簇
* @param tableName
* @param rowKey
* @return
*/
def getRow(tableName:String, rowKey:String):Option[JSONObject] = {
val obj = new JSONObject()
try{
//获取table
val table = getTable(tableName)
//创建get对象
val get = new Get(Bytes.toBytes(rowKey))
if(!table.exists(get)){
return None
}
val result = table.get(get)
val scanner = result.cellScanner()
val familyColumnList = new ListBuffer[(String,String)]
while(scanner.advance()){
val cell = scanner.current()
val family = Bytes.toString(Bytes.copy(cell.getFamilyArray, cell.getFamilyOffset,cell.getFamilyLength))
val column = Bytes.toString(Bytes.copy(cell.getQualifierArray,cell.getQualifierOffset,cell.getQualifierLength))
familyColumnList.append((family,column))
}
familyColumnList.foreach(x=>{
val value = Bytes.toString(result.getValue(Bytes.toBytes(x._1),Bytes.toBytes(x._2)))
obj.put(x._2, value)
})
table.close()
}catch {
case e:Exception => e.printStackTrace()
}
Some(obj)
}
def getAllRows(tableName:String):ListBuffer[JSONObject] = {
val table = getTable(tableName)
val resultScanner = table.getScanner(new Scan())
val list = new ListBuffer[JSONObject]
resultScanner.forEach(result=>{
val cells = result.rawCells()
val obj = new JSONObject()
cells.foreach(cell=>{
// val rowKey = Bytes.toString(CellUtil.cloneRow(cell))
// val family = Bytes.toString(CellUtil.cloneFamily(cell))
val column = Bytes.toString(CellUtil.cloneQualifier(cell))
val value = Bytes.toString(CellUtil.cloneValue(cell))
obj.put(column, value)
})
list += obj
})
table.close()
list
}
/**
* 删除某一行中的一列数据
* @param tableName 表名
* @param rowKey
* @param columnFamily 列簇
* @param columnName 列簇中的字段名
*/
def deleteCell(tableName:String,rowKey:String,columnFamily:String,columnName:String):Unit = {
val table = getTable(tableName)
val delete = new Delete(Bytes.toBytes(rowKey))
//删除一个版本,通常没啥意义
// delete.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName))
//删除所有版本的数据
delete.addColumns(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName))
table.delete(delete)
table.close()
}
/**
* 删除一行全部记录
* @param tableName
* @param rowKey
*/
def deleteRow(tableName:String,rowKey:String):Unit = {
val table = getTable(tableName)
try{
table.delete(new Delete(rowKey.getBytes()))
}catch {
case e:Exception => e.printStackTrace()
}finally {
table.close()
}
}
/**
* 根据rowkey删除多行记录
* @param tableName
* @param rowKeyList
*/
def deleteRows(tableName:String,rowKeyList:Array[String]):Unit = {
val table = getTable(tableName)
try{
rowKeyList.foreach(rowkey=>{
table.delete(new Delete(rowkey.getBytes()))
})
}catch {
case e:Exception => e.printStackTrace()
}finally {
table.close()
}
}
def closeConnection() = {
if(null != connection){
connection.close()
}
}
}
object HbaseUtil {
def apply():HbaseUtil = new HbaseUtil()
}
另外定义了连接配置信息
object EnvConstant {
//测试环境
val HBASE_ZOOKEEPER_QUORUM = "hadoop200:2181,hadoop201:2181,hadoop202:2181"
val PHOENIX_URL = "jdbc:phoenix:hadoop200:2181:/hbase"
}
Phoenix相关的工具类
package com.king
import java.sql.DriverManager
import java.util.Properties
import com.alibaba.fastjson.JSONObject
import com.king.constant.EnvConstant
import scala.collection.mutable.ListBuffer
/**
* @Author: KingWang
* @Date: 2022/10/16
* @Desc:
**/
class PhoenixUtil {
val connection = {
Class.forName("org.apache.phoenix.jdbc.PhoenixDriver")
val prop = new Properties()
DriverManager.getConnection(EnvConstant.PHOENIX_URL, prop)
}
def close(): Unit ={
connection.close()
}
def queryObject(sql:String):ListBuffer[String] = {
val stmt = connection.createStatement
val rs = stmt.executeQuery(sql)
val list = new ListBuffer[String]
while (rs.next()) {
list += rs.getString("rowid")
}
rs.close()
stmt.close()
list
}
def queryList(sql:String):ListBuffer[JSONObject] = {
val stmt = connection.createStatement
val rs = stmt.executeQuery(sql)
val list = new ListBuffer[JSONObject]
val metaData = rs.getMetaData()
while (rs.next()) {
val obj = new JSONObject()
for(i <- 1 to metaData.getColumnCount){
obj.put(metaData.getColumnName(i), rs.getObject(i))
}
list += obj
}
rs.close()
stmt.close()
list
}
}
object PhoenixUtil{
def apply():PhoenixUtil = new PhoenixUtil()
}
2. 测试案例
指定测试表,初始化连接
val tableName = "DEMO:USER"
val hbaseUtil = HbaseUtil()
2.1 插入数据
println("=========插入数据===========")
hbaseUtil.putCell(tableName,"1011","info1","age", "10")
hbaseUtil.putCell(tableName,"1012","info1","age", "15")
hbaseUtil.putCell(tableName,"1013","info1","sex", "male")
hbaseUtil.putCell(tableName,"1013","info1","age", "20")
hbaseUtil.putCell(tableName,"1015","info1","name", "kingww")
hbaseUtil.putCell(tableName,"1015","info1","age", "22")
hbaseUtil.putCell(tableName,"1015","info1","sex", "male")
2.2 插入1个对象
println("=========插入Object对象===========")
val json = new JSONObject()
json.put("name","wwwww")
json.put("age","111")
json.put("sex","female")
json.put("rowkey","1017")
hbaseUtil.putRowByRowKeyField(tableName,"rowkey","info",json)
2.3 批量插入对象
println("=========批量插入Object对象===========")
val count = 10000
val objList = new ListBuffer[JSONObject]
for(i <- 1 until count){
val json = new JSONObject()
json.put("name","wwwww" + "_" + i)
json.put("age","111")
json.put("sex","female")
json.put("rowkey", i)
objList += json
}
hbaseUtil.putList(tableName,"rowkey","info",objList)
2.4 查询单元格数据
println("==========查询单元格数据=============")
println(hbaseUtil.getCells(tableName,"1003","info","name"))
2.5 查询单行数据
println("=================查询单行数据==============")
val obj = hbaseUtil.getRow(tableName,"1003")
if(obj.nonEmpty) println(obj.getOrElse("")) else println("未查到记录")
2.6 删除一行
println("====================删除一行=============")
hbaseUtil.deleteRow(tableName,"1003")
2.7 删除一个单元格
println("=============删除一个单元格===============")
hbaseUtil.deleteCell(tableName,"1003","info1","age")
2.8 查询表的所有数据
val list:ListBuffer[JSONObject] = hbaseUtil.getAllRows(tableName)
for (elem <- list) {println(elem)}
所有操作完成后,记得关闭连接请求。
hbaseUtil.closeConnection()
本文来自博客园,作者:硅谷工具人,转载请注明原文链接:https://www.cnblogs.com/30go/p/16810490.html
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人