|NO.Z.00018|——————————|BigDataEnd|——|Hadoop&实时数仓.V18|——|项目.v18|DIM层处理|从HBash中读取地域维表数据.V2|

一、编程实现程序,从ods层下沉数据到dim层
### --- HBaseReader:从Hbase中获取数据的类:HBaseReaderpackage dim

import java.util

//import myutils.ConnHBase
import org.apache.flink.configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{Cell, TableName}

import scala.collection.JavaConverters._

/**
 * 从HBase中读取yanqi_area
 * 对数据进行转化,转成 地区id,地区的名字,城市的id,城市的名字, 省份的id,省份的名字
 * 在存到HBase中
 */
class HBaseReader extends RichSourceFunction[(String,String)]{
  private var conn : Connection = null
  private var table : Table = null;
  private var scan : Scan = null;


  override def open(parameters: configuration.Configuration): Unit = {
    conn = new ConnHBase().connToHbase

    //    val tableName: TableName = TableName.valueOf("yanqi_trade_orders")
    val tableName: TableName = TableName.valueOf("yanqi_area")
    val cf1: String = "f1"
    table = conn.getTable(tableName)
    scan = new Scan()

    scan.addFamily(Bytes.toBytes(cf1))
  }

  override def run(ctx: SourceFunction.SourceContext[(String, String)]): Unit = {
    val rs: ResultScanner = table.getScanner(scan)
    val iterator: util.Iterator[Result] = rs.iterator()
    while(iterator.hasNext) {
      val result: Result = iterator.next()
      val rowKey: String = Bytes.toString(result.getRow)
      val buffer: StringBuffer = new StringBuffer()
      for(cell: Cell <- result.listCells().asScala) {
        val value: String = Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength)
        buffer.append(value).append("-")
      }

      val valueString: String = buffer.replace(buffer.length() - 1, buffer.length(), "").toString
      ctx.collect((rowKey,valueString))

    }

  }

  override def cancel(): Unit = {

  }

  override def close(): Unit = {
    try {
      if(table != null) {
        table.close()
      }
      if(conn != null) {
        conn.close()
      }
    } catch {
      case e:Exception => println(e.getMessage)
    }
  }
}
### --- HBaseWriterSink::把dim的维表数据存入hbase

package dim

//import myutils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.{Connection, Put, Table}

class HBaseWriterSink extends RichSinkFunction[String]{
  var connection : Connection = _
  var hbTable : Table = _
  override def open(parameters: Configuration): Unit = {
    connection = new ConnHBase().connToHbase
    hbTable = connection.getTable(TableName.valueOf("dim_yanqi_area"))
  }

  override def close(): Unit = {
    if(hbTable != null) {
      hbTable.close()
    }
    if (connection != null) {
      connection.close()
    }
  }

  def insertDimArea(hbTable: Table, value: String): Unit = {
    val infos: Array[String] = value.split(",")
    val areaId: String = infos(0).trim.toString
    val aname: String = infos(1).trim.toString
    val cid: String = infos(2).trim.toString
    val city: String = infos(3).trim.toString
    val proid: String = infos(4).trim.toString
    val province: String = infos(5).trim.toString

    val put = new Put(areaId.getBytes())
    put.addColumn("f1".getBytes(),"aname".getBytes(),aname.getBytes())
    put.addColumn("f1".getBytes(),"cid".getBytes(),cid.getBytes())
    put.addColumn("f1".getBytes(),"city".getBytes(),city.getBytes())
    put.addColumn("f1".getBytes(),"proId".getBytes(),proid.getBytes())
    put.addColumn("f1".getBytes(),"province".getBytes(),province.getBytes())

    hbTable.put(put)
  }

  override def invoke(value: String, context: SinkFunction.Context[_]): Unit = {
    // 打印输出
    // println(value)
    insertDimArea(hbTable,value)
  }

}
### --- AreaDetailInfo:AreaDetailInfo代码同样可以归属在DWD层中)

package dim

import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row

object AreaDetailInfo {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.enableCheckpointing(5000)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)

    /**
     * (110107,010-39.9056-3-116.223-中国,北京,北京市,石景山区-石景山区-110100-Shijingshan-石景山-100043)
     * (110116,010-40.316-3-116.632-中国,北京,北京市,怀柔区-怀柔区-110100-Huairou-怀柔-101400)
     */
    val data: DataStream[(String, String)] = env.addSource(new HBaseReader)

//    data.print()

//获取几个必要的字段id name pid
    val dataStream: DataStream[AreaDetail] = data.map(x => {
      val id: Int = x._1.toInt
      val datas: Array[String] = x._2.split("-")
      val name: String = datas(5).trim
      val pid: Int = datas(6).trim.toInt

      AreaDetail(id, name, pid)
    })

    //转成 地区id,地区的名字,城市的id,城市的名字, 省份的id,省份的名字
    //FlinkTable api
    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)

    //临时表
    tableEnv.createTemporaryView("yanqi_area",dataStream)

    //sql -- 生成 区、市、省三级的明细宽表
    val sql : String =
      """
        |select a.id as areaid,a.name as aname,a.pid as cid,b.name as city, c.id as proid,c.name as province
        |from yanqi_area as a
        |inner join yanqi_area as b on a.pid = b.id
        |inner join yanqi_area as c on b.pid = c.id
        |""".stripMargin


    val areaTable: Table = tableEnv.sqlQuery(sql)

    val resultStream: DataStream[String] = tableEnv.toRetractStream[Row](areaTable).map(x => {
      val row: Row = x._2
      val areaId: String = row.getField(0).toString
      val aname: String = row.getField(1).toString
      val cid: String = row.getField(2).toString
      val city: String = row.getField(3).toString
      val proid: String = row.getField(4).toString
      val province: String = row.getField(5).toString
      areaId + "," + aname + "," + cid + "," + city + "," + proid + "," + province
    })
    resultStream.addSink(new HBaseWriterSink)

    env.execute()
  }
}

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

posted on   yanqi_vip  阅读(17)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示