|NO.Z.00034|——————————|BigDataEnd|——|Hadoop&实时数仓.V14|——|项目.v14|DWD层处理|数据仓库层数据处理.V4|
一、编程实现DWD(AreaDetailInfo代码同样可以归属在DIM层中):参考代码
### --- 编程实现dwd层数据:AreaDetailInfo
package dim
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row
object AreaDetailInfo {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(5000)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
/**
* (110107,010-39.9056-3-116.223-中国,北京,北京市,石景山区-石景山区-110100-Shijingshan-石景山-100043)
* (110116,010-40.316-3-116.632-中国,北京,北京市,怀柔区-怀柔区-110100-Huairou-怀柔-101400)
*/
val data: DataStream[(String, String)] = env.addSource(new HBaseReader)
// data.print()
//获取几个必要的字段id name pid
val dataStream: DataStream[AreaDetail] = data.map(x => {
val id: Int = x._1.toInt
val datas: Array[String] = x._2.split("-")
val name: String = datas(5).trim
val pid: Int = datas(6).trim.toInt
AreaDetail(id, name, pid)
})
//转成 地区id,地区的名字,城市的id,城市的名字, 省份的id,省份的名字
//FlinkTable api
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
//临时表
tableEnv.createTemporaryView("yanqi_area",dataStream)
//sql -- 生成 区、市、省三级的明细宽表
val sql : String =
"""
|select a.id as areaid,a.name as aname,a.pid as cid,b.name as city, c.id as proid,c.name as province
|from yanqi_area as a
|inner join yanqi_area as b on a.pid = b.id
|inner join yanqi_area as c on b.pid = c.id
|""".stripMargin
val areaTable: Table = tableEnv.sqlQuery(sql)
val resultStream: DataStream[String] = tableEnv.toRetractStream[Row](areaTable).map(x => {
val row: Row = x._2
val areaId: String = row.getField(0).toString
val aname: String = row.getField(1).toString
val cid: String = row.getField(2).toString
val city: String = row.getField(3).toString
val proid: String = row.getField(4).toString
val province: String = row.getField(5).toString
areaId + "," + aname + "," + cid + "," + city + "," + proid + "," + province
})
resultStream.addSink(new HBaseWriterSink)
env.execute()
}
}
### --- HBaseWriterSink:
package dim
import myutils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.sink.{RichSinkFunction, SinkFunction}
import org.apache.hadoop.hbase.TableName
import org.apache.hadoop.hbase.client.{Connection, Put, Table}
class HBaseWriterSink extends RichSinkFunction[String]{
var connection : Connection = _
var hbTable : Table = _
override def open(parameters: Configuration): Unit = {
connection = new ConnHBase().connToHbase
hbTable = connection.getTable(TableName.valueOf("dim_yanqi_area"))
}
override def close(): Unit = {
if(hbTable != null) {
hbTable.close()
}
if (connection != null) {
connection.close()
}
}
def insertDimArea(hbTable: Table, value: String): Unit = {
val infos: Array[String] = value.split(",")
val areaId: String = infos(0).trim.toString
val aname: String = infos(1).trim.toString
val cid: String = infos(2).trim.toString
val city: String = infos(3).trim.toString
val proid: String = infos(4).trim.toString
val province: String = infos(5).trim.toString
val put = new Put(areaId.getBytes())
put.addColumn("f1".getBytes(),"aname".getBytes(),aname.getBytes())
put.addColumn("f1".getBytes(),"cid".getBytes(),cid.getBytes())
put.addColumn("f1".getBytes(),"city".getBytes(),city.getBytes())
put.addColumn("f1".getBytes(),"proId".getBytes(),proid.getBytes())
put.addColumn("f1".getBytes(),"province".getBytes(),province.getBytes())
hbTable.put(put)
}
override def invoke(value: String, context: SinkFunction.Context[_]): Unit = {
// println(value)
insertDimArea(hbTable,value)
}
}
Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
——W.S.Landor
分类:
bdv026-EB实时数仓
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通