|NO.Z.00041|——————————|BigDataEnd|——|Hadoop&实时数仓.V21|——|项目.v21|需求一:数据处理&全量查询.V5|——|DWS:hbash:yanqi_trade_orders+dim_yanqi_area生成flink:table中临时表|
一、编程实现第四层:DWS层数据处理:统计城市、省份订单总额
### --- 编程实现第四层:DWS层数据处理:统计城市、省份订单总额
~~~ dws : 统计城市、省份的订单总额,
~~~ 订单总额和订单总数:orderNo、userId、status、totalMoney、areaId
~~~ ODS——mysql:yanqi_trade_orders 下沉 hbash:yanqi_trade_orders
~~~ ODS——mysql:yanqi_area 下沉 hbash:yanqi_area
~~~ DIM ——hbash:yanqi_area 生成 hbash:dim_yanqi_area 地域宽表
~~~ DWS——hbash:yanqi_trade_orders+dim_yanqi_area生成flink:table中临时表
二、编译实现运行程序
### --- 编程实现:TotalCityOrder.scala
package dw.dws
import dim.DimArea
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.table.api.Table
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row
/**
* 需求1 : 查询城市、省份、订单总额、订单总数----全量查询
* 获取两部分数据
* 1、dim_yanqi_area dim维表数据
* 2、增量数据 yanqi_trade_orders(HBase)
* 进行计算
* 1,2 统一到一起参与计算 sql
*
* //把获取到的数据 转成flinktable中的临时表
* sql
*
*/
object TotalCityOrder {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.enableCheckpointing(5000)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE)
//1、dim_yanqi_area dim维表数据
val dimAreaStream: DataStream[(String, String)] = env.addSource(new ReadDimArea)
//2、增量数据 yanqi_trade_orders(HBase)
val tradeOrderStream: DataStream[(String, String)] = env.addSource(new ReadTradeOrder)
// tradeOrderStream.print()
val areaStream: DataStream[DimArea] = dimAreaStream.map(x => {
val arearId: Int = x._1.toInt
val datas: Array[String] = x._2.split(",")
val aname: String = datas(0).trim.toString
val cid: Int = datas(1).trim.toInt
val city: String = datas(2).trim.toString
val proid: Int = datas(3).trim.toInt
val province: String = datas(4).trim.toString
DimArea(arearId, aname, cid, city, proid, province)
})
// areaStream.print()
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
tableEnv.createTemporaryView("dim_yanqi_area",areaStream)
/**
* (1,370203,2020-06-28 18:14:01,2,0,1,2020-10-21 22:54:31,1,23a0b124546,2,2020-06-28 18:14:01,0.12,2,10468.0,0,0,98)
* (1,370203,2020-06-28 18:14:01,2,0,1,2020-10-21 22:54:31,1,23a0b124546,2,2020-06-28 18:14:01,0.12,2,10468.0,0,0,98)
* orderid
* orderNo
* userId
* status
* totalMoney
* areaId
*/
val orderStream: DataStream[TradeOrder] = tradeOrderStream.map(x => {
val orderid: Int = x._1.toInt
val datas: Array[String] = x._2.split(",")
// datas.foreach(println)
val orderNo: String = datas(7).trim.toString
val userId: Int = datas(15).trim.toInt
val status: Int = datas(11).toInt
val totalMoney: Double = datas(12).toDouble
val areaId: Int = datas(0).toInt
// println(orderid, orderNo, userId, status, totalMoney, areaId)
TradeOrder(orderid, orderNo, userId, status, totalMoney, areaId)
})
orderStream.print()
tableEnv.createTemporaryView("yanqi_orders",orderStream)
val sql :String =
"""
|select f.city,f.province,sum(f.qusum) as orderMoney, sum(f.qucount) as orderCount from
|(select r.aname as qu,r.city as city,r.province as province,sum(k.totalMoney) as qusum,count(k.totalMoney) as qucount
|from yanqi_orders as k
|inner join dim_yanqi_area as r
|on k.areaId = r.areaId
|group by r.aname,r.city,r.province) as f
|group by f.city,f.province
|""".stripMargin
val resultTable: Table = tableEnv.sqlQuery(sql)
val result: DataStream[(Boolean, Row)] = tableEnv.toRetractStream[Row](resultTable)
// result.print()
result.filter(x=>x._1 == true).print()
env.execute()
}
}
### --- 编程实现:ReadDimArea.scala
package dw.dws
import java.util
import myutils.ConnHBase
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{Cell, TableName}
import scala.collection.JavaConverters._
class ReadDimArea extends RichSourceFunction[(String,String)]{
private var conn :Connection = null;
private var table : Table = null;
private var scan : Scan = null;
var flag = false
override def open(parameters: Configuration): Unit = {
val tableName: TableName = TableName.valueOf("dim_yanqi_area")
val cf1 :String = "f1"
conn = new ConnHBase().connToHbase
table = conn.getTable(tableName)
scan = new Scan()
scan.addFamily(Bytes.toBytes(cf1))
}
override def run(ctx: SourceFunction.SourceContext[(String, String)]): Unit = {
if(!flag) {
val rs: ResultScanner = table.getScanner(scan)
val iterator: util.Iterator[Result] = rs.iterator()
while(iterator.hasNext) {
val result: Result = iterator.next()
val rowKey: String = Bytes.toString(result.getRow)
val buffer = new StringBuffer()
for(cell: Cell <- result.listCells().asScala) {
val value: String = Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength)
buffer.append(value).append(",")
}
val valueString: String = buffer.replace(buffer.length() - 1, buffer.length(), "").toString
ctx.collect(rowKey,valueString)
}
}
}
override def cancel(): Unit = {
flag = true;
}
override def close(): Unit = {
try{
if(table != null) {
table.close()
}
if(conn != null) {
conn.close()
}
}catch {
case e : Exception => println(e.getMessage)
}
}
}
### --- 编程实现:ReadTradOrder.scala
package dw.dws
import java.util
import myutils.ConnHBase
import org.apache.flink.configuration
import org.apache.flink.streaming.api.functions.source.{RichSourceFunction, SourceFunction}
import org.apache.hadoop.hbase.client._
import org.apache.hadoop.hbase.util.Bytes
import org.apache.hadoop.hbase.{Cell, TableName}
import scala.collection.JavaConverters._
class ReadTradeOrder extends RichSourceFunction[(String,String)]{
private var conn :Connection = null;
private var table : Table = null;
private var scan : Scan = null;
var flag = false
override def open(parameters: configuration.Configuration): Unit = {
// val tableName: TableName = TableName.valueOf("yanqi")
// val tableName: TableName = TableName.valueOf("orders")
val tableName: TableName = TableName.valueOf("yanqi_trade_orders")
val cf1 :String = "f1"
conn = new ConnHBase().connToHbase
table = conn.getTable(tableName)
scan = new Scan()
scan.addFamily(Bytes.toBytes(cf1))
}
override def run(ctx: SourceFunction.SourceContext[(String, String)]): Unit = {
if(!flag) {
val rs: ResultScanner = table.getScanner(scan)
val iterator: util.Iterator[Result] = rs.iterator()
while(iterator.hasNext) {
val result: Result = iterator.next()
val rowKey: String = Bytes.toString(result.getRow)
val buffer = new StringBuffer()
for(cell: Cell <- result.listCells().asScala) {
val value: String = Bytes.toString(cell.getValueArray, cell.getValueOffset, cell.getValueLength)
buffer.append(value).append(",")
}
val valueString: String = buffer.replace(buffer.length() - 1, buffer.length(), "").toString
// println("--" + valueString)
ctx.collect(rowKey,valueString)
}
}
}
override def cancel(): Unit = {
flag = true;
}
override def close(): Unit = {
try{
if(table != null) {
table.close()
}
if(conn != null) {
conn.close()
}
}catch {
case e : Exception => println(e.getMessage)
}
}
}
Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
——W.S.Landor
分类:
bdv026-EB实时数仓
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通