|NO.Z.00029|——————————|BigDataEnd|——|Hadoop&实时数仓.V09|——|项目.v09|DIM层处理|从HBash中读取地域维表数据.V2|
一、编程实现程序,从ods层下沉数据到dim层
### --- HBaseReader:从Hbase中获取数据的类:HBaseReader:从hbash:yanqi_area中读取数据
package dim
import java.util
import myutils.ConnHBase
import org.apache.flink.configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{Cell, TableName}
import scala.collection.JavaConverters._
/**
* 从HBase中读取yanqi_area
* 对数据进行转化,转成 地区id,地区的名字,城市的id,城市的名字, 省份的id,省份的名字
* 在存到HBase中
*/
class HBaseReader extends RichSourceFunction[(String,String)]{
private var conn : Connection = null
private var table : Table = null;
private var scan : Scan = null;
override def open(parameters: configuration.Configuration): Unit = {
conn = new ConnHBase().connToHbase
// val tableName: TableName = TableName.valueOf("yanqi_trade_orders")
val tableName: TableName = TableName.valueOf("yanqi_area")
val cf1: String = "f1"
table = conn.getTable(tableName)
scan = new Scan()
scan.addFamily(Bytes.toBytes(cf1))
}
override def run(ctx: SourceFunction.SourceContext[(String, String)]): Unit = {
val rs: ResultScanner = table.getScanner(scan)
val iterator: util.Iterator[Result] = rs.iterator()
while(iterator.hasNext) {
val result: Result = iterator.next()
val rowKey: String = Bytes.toString(result.getRow)
val buffer: StringBuffer = new StringBuffer()
for(cell: Cell <- result.listCells().asScala) {
val value: String = Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength)
buffer.append(value).append("-")
}
val valueString: String = buffer.replace(buffer.length() - 1, buffer.length(), "").toString
ctx.collect((rowKey,valueString))
}
}
override def cancel(): Unit = {
}
override def close(): Unit = {
try {
if(table != null) {
table.close()
}
if(conn != null) {
conn.close()
}
} catch {
case e:Exception => println(e.getMessage)
}
}
}
### --- HBaseWriterSink::把读取dim的维表数据hbash:yanqi_area数据写入hbash:dim_yanqi_area表中
package dim
import myutils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.{Connection, Put, Table}
class HBaseWriterSink extends RichSinkFunction[String]{
var connection : Connection = _
var hbTable : Table = _
override def open(parameters: Configuration): Unit = {
connection = new ConnHBase().connToHbase
hbTable = connection.getTable(TableName.valueOf("dim_yanqi_area"))
}
override def close(): Unit = {
if(hbTable != null) {
hbTable.close()
}
if (connection != null) {
connection.close()
}
}
def insertDimArea(hbTable: Table, value: String): Unit = {
val infos: Array[String] = value.split(",")
val areaId: String = infos(0).trim.toString
val aname: String = infos(1).trim.toString
val cid: String = infos(2).trim.toString
val city: String = infos(3).trim.toString
val proid: String = infos(4).trim.toString
val province: String = infos(5).trim.toString
val put = new Put(areaId.getBytes())
put.addColumn("f1".getBytes(),"aname".getBytes(),aname.getBytes())
put.addColumn("f1".getBytes(),"cid".getBytes(),cid.getBytes())
put.addColumn("f1".getBytes(),"city".getBytes(),city.getBytes())
put.addColumn("f1".getBytes(),"proId".getBytes(),proid.getBytes())
put.addColumn("f1".getBytes(),"province".getBytes(),province.getBytes())
hbTable.put(put)
}
override def invoke(value: String, context: SinkFunction.Context[_]): Unit = {
// println(value)
insertDimArea(hbTable,value)
}
}
### --- AreaDetailInfo:AreaDetailInfo代码同样可以归属在DWD层中)
package dim
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row
object AreaDetailInfo {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(5000)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
/**
* (110107,010-39.9056-3-116.223-中国,北京,北京市,石景山区-石景山区-110100-Shijingshan-石景山-100043)
* (110116,010-40.316-3-116.632-中国,北京,北京市,怀柔区-怀柔区-110100-Huairou-怀柔-101400)
*/
val data: DataStream[(String, String)] = env.addSource(new HBaseReader)
// data.print()
//获取几个必要的字段id name pid
val dataStream: DataStream[AreaDetail] = data.map(x => {
val id: Int = x._1.toInt
val datas: Array[String] = x._2.split("-")
val name: String = datas(5).trim
val pid: Int = datas(6).trim.toInt
AreaDetail(id, name, pid)
})
//转成 地区id,地区的名字,城市的id,城市的名字, 省份的id,省份的名字
//FlinkTable api
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
//临时表
tableEnv.createTemporaryView("yanqi_area",dataStream)
//sql -- 生成 区、市、省三级的明细宽表
val sql : String =
"""
|select a.id as areaid,a.name as aname,a.pid as cid,b.name as city, c.id as proid,c.name as province
|from yanqi_area as a
|inner join yanqi_area as b on a.pid = b.id
|inner join yanqi_area as c on b.pid = c.id
|""".stripMargin
val areaTable: Table = tableEnv.sqlQuery(sql)
val resultStream: DataStream[String] = tableEnv.toRetractStream[Row](areaTable).map(x => {
val row: Row = x._2
val areaId: String = row.getField(0).toString
val aname: String = row.getField(1).toString
val cid: String = row.getField(2).toString
val city: String = row.getField(3).toString
val proid: String = row.getField(4).toString
val province: String = row.getField(5).toString
areaId + "," + aname + "," + cid + "," + city + "," + proid + "," + province
})
resultStream.addSink(new HBaseWriterSink)
env.execute()
}
}
Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
——W.S.Landor
分类:
bdv026-EB实时数仓
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通