|NO.Z.00056|——————————|BigDataEnd|——|Hadoop&实时数仓.V36|——|项目.v36|需求五:数据处理&渠道来源统计.V2|——|编程实现|
一、编程实现:ChanalStatistics:实时统计各渠道来源用户数量
package dw.dws
import com.alibaba.fastjson.{JSON, JSONObject}
import modes.{ChanalDetail, CountByChannal}
import myutils.SourceKafka
import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.function.WindowFunction
import org.apache.flink.streaming.api.scala.{DataStream, KeyedStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.util.Collector
object ChanalStatistics {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val kafkaConsumer: FlinkKafkaConsumer[String] = new SourceKafka().getKafkaSource("eventlog")
val data: DataStream[String] = env.addSource(kafkaConsumer)
/**
* chanal/uid
*/
val chanalDetailStream: DataStream[ChanalDetail] = data.map(x => {
val jsonObj: JSONObject = JSON.parseObject(x.toString)
val attrObject: JSONObject = jsonObj.getJSONObject("attr")
val chanal: String = attrObject.get("channel").toString
val uid: String = attrObject.get("uid").toString
ChanalDetail(chanal, uid)
})
val keyed: KeyedStream[ChanalDetail, String] = chanalDetailStream.keyBy(_.chanal)
val value: DataStream[CountByChannal] = keyed.timeWindow(Time.seconds(10))
.aggregate(new ChanalAggFunc, new ChanalWindowFunc)
val result: DataStream[String] = value.process(new ProcessPrint)
result.print()
env.execute()
}
class ChanalAggFunc extends AggregateFunction[ChanalDetail,Long,Long] {
override def createAccumulator(): Long = 0L
override def add(value: ChanalDetail, accumulator: Long): Long = accumulator + 1
override def getResult(accumulator: Long): Long = accumulator
override def merge(a: Long, b: Long): Long = a + b
}
class ChanalWindowFunc extends WindowFunction[Long,CountByChannal,String,TimeWindow] {
override def apply(key: String, window: TimeWindow, input: Iterable[Long], out: Collector[CountByChannal]): Unit = {
out.collect(CountByChannal(key,input.iterator.next()))
}
}
class ProcessPrint extends ProcessFunction[CountByChannal,String] {
override def processElement(value: CountByChannal, ctx: ProcessFunction[CountByChannal, String]#Context, out: Collector[String]): Unit = {
val messag = s"渠道:${value.chanal}的来源用户数量:${value.count}"
out.collect(messag)
}
}
}
Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
——W.S.Landor
分类:
bdv026-EB实时数仓
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通