Hbase工具类-API操作

1. HbaseUtil工具类

Hbase虽然提供了相关的API,但是在实际使用过程中还是非常麻烦,因此根据官方的API封装对应的工具类,从而简化开发操作。

package com.king

import java.math.BigDecimal
import java.util

import com.alibaba.fastjson.JSONObject
import com.king.constant.EnvConstant
import org.apache.commons.lang3.StringUtils
import org.apache.hadoop.hbase.{CellUtil, HBaseConfiguration, HConstants, TableName}
import org.apache.hadoop.hbase.client.{BufferedMutatorParams, ConnectionFactory, Delete, Get, Put, Scan, Table}
import org.apache.hadoop.hbase.util.Bytes

import scala.collection.mutable.ListBuffer

/**
 * @Author: KingWang
 * @Date: 2022/10/12  
 * @Desc:
 **/
class HbaseUtil {

  val connection = {
    println("初始化connection")
    val conf = HBaseConfiguration.create()
    conf.set(HConstants.ZOOKEEPER_QUORUM, EnvConstant.HBASE_ZOOKEEPER_QUORUM)
    ConnectionFactory.createConnection(conf)
  }

  /**
   * tableName由命名空间:表名拼接而成
   * 例如:ods:user
   * @param tableName
   * @return
   */
  def getTable(tableName:String):Table = {
    connection.getTable(TableName.valueOf(tableName))
  }



  /**
   * 插入某一行中的某一列
   * @param tableName
   * @param rowKey
   * @param columnFamily
   * @param columnName
   * @param value
   */
  def putCell(tableName:String, rowKey:String, columnFamily:String, columnName:String,value:String) = {

    //获取table
    val table = getTable(tableName)

    //创建put对象
    val put = new Put(Bytes.toBytes(rowKey))
    //给put对象添加数据
    put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName),Bytes.toBytes(value))
    //将put写入table
    table.put(put)

    //关闭table
    table.close()

  }
  def toPlainString(value: Any): String = {
    val stringValue = value match {
      case decimal: BigDecimal => decimal.toPlainString
      case _ => String.valueOf(value)
    }
    stringValue
  }

  /**
   * 指定列簇插入一个对象
   * @param tableName
   * @param rowKey
   * @param columnFamily
   * @param obj
   */
  def putRow(tableName:String, rowKey:String,columnFamily:String,obj:JSONObject):Unit = {
    putRowByRemoveEmpty(tableName,rowKey,columnFamily,obj)
  }

  /**
   * 根据rowkey插入或者更新数据
   * @param tableName
   * @param rowkey
   * @param colFamily
   * @param obj
   * @param isRemoveEmpty
   */
  def putRowByRemoveEmpty(tableName:String, rowkey: String, colFamily: String, obj: JSONObject, isRemoveEmpty: Boolean = true): Unit = {
    val table = getTable(tableName)
    val put = new Put(Bytes.toBytes(rowkey))
    obj.forEach((k,v) =>{
      if(isRemoveEmpty){
        if(valueNotEmpty(v)){
          put.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(k), Bytes.toBytes(toPlainString(v)))
        }
        //        else{} //值为空的字段不更新
      }else{
        put.addColumn(Bytes.toBytes(colFamily), Bytes.toBytes(k), Bytes.toBytes(toPlainString(v)))
      }
    })
    table.put(put)
    table.close()
  }



  /**
   *
   * @param tableName
   * @param rowKeyField rowkey对应JSONObject中的字段名
   * @param columnFamily 列簇, 仅有1个
   * @param list
   */
  def putList(tableName:String, rowKeyField:String,columnFamily:String,list:ListBuffer[JSONObject]):Unit = {
    putListByRemoveEmpty(tableName,rowKeyField,columnFamily,list)
  }

  /**
   *
   * @param tableName
   * @param rowKeyField rowkey对应JSONObject中的字段名
   * @param columnFamily 列簇, 仅有1个
   * @param list
   * @param isRemoveEmpty  是否移除list对象中的空元素,默认是。这里指空值不会更新到字段中
   * @param batchSize 每批次提交记录数,默认1000
   * @param bufferSize
   */
  def putListByRemoveEmpty(tableName:String, rowKeyField:String,columnFamily:String,list:ListBuffer[JSONObject],isRemoveEmpty: Boolean = true, batchSize:Int = 1000, bufferSize:Int = 1024*1024*24):Unit = {
    val params = new BufferedMutatorParams(TableName.valueOf(tableName))
    params.writeBufferSize(bufferSize)
    val bufferMutator = connection.getBufferedMutator(params)
    val putList:util.List[Put] = new util.ArrayList()
    var count = 0
    list.foreach(obj => {

      val rowKey = obj.getString(rowKeyField)
      val put = new Put(Bytes.toBytes(rowKey))
      obj.forEach((k,v)=>{
        if(isRemoveEmpty){
          if(valueNotEmpty(v)){
            put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(k),Bytes.toBytes(toPlainString(v)))
          }
        }else{
          put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(k),Bytes.toBytes(toPlainString(v)))
        }

      })
      putList.add(put)
      count = count + 1

      if(count % batchSize == 0){
        bufferMutator.mutate(putList)
        bufferMutator.flush()
        putList.clear()
      }
    })

    if(putList.size() > 0){
      bufferMutator.mutate(putList)
      bufferMutator.flush()
      putList.clear()
    }
    bufferMutator.close()
  }

  def valueNotEmpty(value: Any): Boolean = {
    value != null && !"null".equalsIgnoreCase(value.toString) && StringUtils.isNotBlank(value.toString)
  }

  /**
   * 指定列簇插入对象
   * @param tableName
   * @param rowKeyField
   * @param columnFamily
   * @param obj
   */
  def putRowByRowKeyField(tableName:String, rowKeyField:String,columnFamily:String,obj:JSONObject):Unit = {
    val table = getTable(tableName)
    val rowKey = obj.getString(rowKeyField)
    if(null == rowKey || "".equals(rowKey)){
      throw new Exception("请指定obj中rowKey的字段名")
    }
    val put = new Put(Bytes.toBytes(rowKey))
    obj.forEach((k,v)=> {
      put.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(k),Bytes.toBytes(toPlainString(v)))
    })
    table.put(put)
    table.close()
  }

  /**
   * 读取某一行中的某一列
   * @param tableName
   * @param rowKey
   * @param columnFamily
   * @param columnName
   * @return
   */
  def getCells(tableName:String, rowKey:String, columnFamily:String,columnName:String):String = {

    //获取table
    val table = getTable(tableName)
    //创建get对象
    val get = new Get(Bytes.toBytes(rowKey))
    //如果直接用get方法读取数据,会读取一整行数据
    //如果要读取某一列的数据,需要添加对应的参数
    get.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName))

    //设置读取数据的版本
    get.readAllVersions()

    //或者读取最新的版本
    //    val maxVersion = get.getMaxVersions
    //    get.readVersions(maxVersion)  也可以指定读取某1个版本

    val result = table.get(get)
    val value = Bytes.toString(result.getValue(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName)))

    //处理数据
    //    val cells = result.rawCells()
    //    cells.foreach(x=> println(new String(CellUtil.cloneValue(x))))

    table.close()
    value
  }

  /**
   * 读取一行记录,指定列簇,以及列簇中的字段
   * @param tableName
   * @param rowKey
   * @param columnFamily
   * @param columns
   * @return
   */
  def getRow(tableName:String, rowKey:String, columnFamily:String,columns:Array[String]):Option[JSONObject] = {

    val obj = new JSONObject()
    try{
      //获取table
      val table = getTable(tableName)
      //创建get对象
      val get = new Get(Bytes.toBytes(rowKey))

      if(!table.exists(get)){
        return None
      }

      val result = table.get(get)
      columns.foreach(column=>{
        val value = Bytes.toString(result.getValue(Bytes.toBytes(columnFamily),Bytes.toBytes(column)))
        obj.put(column, value)
      })
      table.close()

    }catch {
      case e:Exception => e.printStackTrace()
    }
    Some(obj)
  }


  /**
   * 读取一行记录,不指定列簇
   * @param tableName
   * @param rowKey
   * @return
   */
  def getRow(tableName:String, rowKey:String):Option[JSONObject] = {

    val obj = new JSONObject()
    try{
      //获取table
      val table = getTable(tableName)
      //创建get对象
      val get = new Get(Bytes.toBytes(rowKey))

      if(!table.exists(get)){
        return None
      }

      val result = table.get(get)
      val scanner = result.cellScanner()
      val familyColumnList = new ListBuffer[(String,String)]
      while(scanner.advance()){
        val cell = scanner.current()
        val family = Bytes.toString(Bytes.copy(cell.getFamilyArray, cell.getFamilyOffset,cell.getFamilyLength))
        val column = Bytes.toString(Bytes.copy(cell.getQualifierArray,cell.getQualifierOffset,cell.getQualifierLength))
        familyColumnList.append((family,column))
      }
      familyColumnList.foreach(x=>{
        val value = Bytes.toString(result.getValue(Bytes.toBytes(x._1),Bytes.toBytes(x._2)))
        obj.put(x._2, value)
      })
      table.close()
    }catch {
      case e:Exception => e.printStackTrace()
    }
    Some(obj)
  }


  def getAllRows(tableName:String):ListBuffer[JSONObject] = {
    val table = getTable(tableName)
    val resultScanner = table.getScanner(new Scan())
    val list = new ListBuffer[JSONObject]
    resultScanner.forEach(result=>{
      val cells = result.rawCells()
      val obj = new JSONObject()
      cells.foreach(cell=>{
//        val rowKey = Bytes.toString(CellUtil.cloneRow(cell))
//        val family = Bytes.toString(CellUtil.cloneFamily(cell))
        val column = Bytes.toString(CellUtil.cloneQualifier(cell))
        val value = Bytes.toString(CellUtil.cloneValue(cell))
        obj.put(column, value)
      })
      list += obj
    })
    table.close()
    list

  }

  /**
   * 删除某一行中的一列数据
   * @param tableName 表名
   * @param rowKey
   * @param columnFamily 列簇
   * @param columnName 列簇中的字段名
   */
  def deleteCell(tableName:String,rowKey:String,columnFamily:String,columnName:String):Unit = {
    val table = getTable(tableName)
    val delete = new Delete(Bytes.toBytes(rowKey))

    //删除一个版本,通常没啥意义
    //    delete.addColumn(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName))

    //删除所有版本的数据
    delete.addColumns(Bytes.toBytes(columnFamily),Bytes.toBytes(columnName))
    table.delete(delete)
    table.close()

  }

  /**
   *  删除一行全部记录
   * @param tableName
   * @param rowKey
   */
  def deleteRow(tableName:String,rowKey:String):Unit = {
    val table = getTable(tableName)
    try{
      table.delete(new Delete(rowKey.getBytes()))
    }catch {
      case e:Exception => e.printStackTrace()
    }finally {
      table.close()
    }
  }

  /**
   * 根据rowkey删除多行记录
   * @param tableName
   * @param rowKeyList
   */
  def deleteRows(tableName:String,rowKeyList:Array[String]):Unit = {
    val table = getTable(tableName)
    try{
      rowKeyList.foreach(rowkey=>{
        table.delete(new Delete(rowkey.getBytes()))
      })
    }catch {
      case e:Exception => e.printStackTrace()
    }finally {
      table.close()
    }
  }

  def closeConnection() = {
    if(null != connection){
      connection.close()
    }
  }
}

object HbaseUtil {
  def apply():HbaseUtil = new HbaseUtil()
}

另外定义了连接配置信息

object EnvConstant {
  //测试环境
  val HBASE_ZOOKEEPER_QUORUM = "hadoop200:2181,hadoop201:2181,hadoop202:2181"
  val PHOENIX_URL = "jdbc:phoenix:hadoop200:2181:/hbase"
}

Phoenix相关的工具类

package com.king

import java.sql.DriverManager
import java.util.Properties

import com.alibaba.fastjson.JSONObject
import com.king.constant.EnvConstant

import scala.collection.mutable.ListBuffer

/**
 * @Author: KingWang
 * @Date: 2022/10/16  
 * @Desc:
 **/
class PhoenixUtil {

  val connection = {
    Class.forName("org.apache.phoenix.jdbc.PhoenixDriver")
    val prop = new Properties()
    DriverManager.getConnection(EnvConstant.PHOENIX_URL, prop)
  }


  def close(): Unit ={
    connection.close()
  }

  def queryObject(sql:String):ListBuffer[String] = {
    val stmt = connection.createStatement
    val rs = stmt.executeQuery(sql)
    val list = new ListBuffer[String]
    while (rs.next()) {
      list += rs.getString("rowid")
    }
    rs.close()
    stmt.close()
    list
  }

  def queryList(sql:String):ListBuffer[JSONObject] = {
    val stmt = connection.createStatement
    val rs = stmt.executeQuery(sql)
    val list = new ListBuffer[JSONObject]
    val metaData = rs.getMetaData()
    while (rs.next()) {
      val obj = new JSONObject()
      for(i <- 1 to metaData.getColumnCount){
        obj.put(metaData.getColumnName(i), rs.getObject(i))
      }
      list += obj
    }
    rs.close()
    stmt.close()
    list
  }
}

object PhoenixUtil{
  def apply():PhoenixUtil = new PhoenixUtil()
}

2. 测试案例

指定测试表,初始化连接

    val tableName = "DEMO:USER"
    val hbaseUtil = HbaseUtil()

2.1 插入数据

    println("=========插入数据===========")
    hbaseUtil.putCell(tableName,"1011","info1","age", "10")
    hbaseUtil.putCell(tableName,"1012","info1","age", "15")
    hbaseUtil.putCell(tableName,"1013","info1","sex", "male")
    hbaseUtil.putCell(tableName,"1013","info1","age", "20")
    hbaseUtil.putCell(tableName,"1015","info1","name", "kingww")
    hbaseUtil.putCell(tableName,"1015","info1","age", "22")
    hbaseUtil.putCell(tableName,"1015","info1","sex", "male")

2.2 插入1个对象

    println("=========插入Object对象===========")
    val json = new JSONObject()
    json.put("name","wwwww")
    json.put("age","111")
    json.put("sex","female")
    json.put("rowkey","1017")
    hbaseUtil.putRowByRowKeyField(tableName,"rowkey","info",json)

2.3 批量插入对象

    println("=========批量插入Object对象===========")
    val count = 10000
    val objList = new ListBuffer[JSONObject]
    for(i <- 1 until count){
      val json = new JSONObject()
      json.put("name","wwwww" + "_" + i)
      json.put("age","111")
      json.put("sex","female")
      json.put("rowkey", i)
      objList += json
    }
    hbaseUtil.putList(tableName,"rowkey","info",objList)

2.4 查询单元格数据

    println("==========查询单元格数据=============")
    println(hbaseUtil.getCells(tableName,"1003","info","name"))

2.5 查询单行数据

    println("=================查询单行数据==============")
    val obj = hbaseUtil.getRow(tableName,"1003")
    if(obj.nonEmpty) println(obj.getOrElse("")) else println("未查到记录")

2.6 删除一行

    println("====================删除一行=============")
    hbaseUtil.deleteRow(tableName,"1003")

2.7 删除一个单元格

    println("=============删除一个单元格===============")
    hbaseUtil.deleteCell(tableName,"1003","info1","age")

2.8 查询表的所有数据

    val list:ListBuffer[JSONObject] = hbaseUtil.getAllRows(tableName)
    for (elem <- list) {println(elem)}

所有操作完成后,记得关闭连接请求。

    hbaseUtil.closeConnection()
posted @   硅谷工具人  阅读(143)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
成功之道,在于每个人生阶段都要有不同的目标,并且通过努力实现自己的目标,毕竟人生不过百年! 所有奋斗的意义在于为个人目标实现和提升家庭幸福,同时能推进社会进步和国家目标! 正如古人讲的正心诚意格物致知,修身齐家治国平天下。
点击右上角即可分享
微信分享提示