CoProcessFunction 练习一
package www.zcb.threeweek import org.apache.flink.api.common.state.ValueStateDescriptor import org.apache.flink.streaming.api.functions.co.CoProcessFunction import org.apache.flink.streaming.api.scala._ import org.apache.flink.util.Collector /* * 构建Flink流式环境,正确读取订单数据和到账数据,构建2条流 2)使用支付编号作为连接键,连接2条流 3)通过主流输出订单和支付能够匹配上的数据 4)通过侧输出流输出有订单但未到账的数据 5)通过侧输出流输出有到账但订单数据缺失的数据 * */ case class Order(did:String,type1:String,zid:String,time1:Long) case class Rec(zid:String,qudao:String,time2:Long) object Test02 { def main(args: Array[String]): Unit = { val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment env.setParallelism(1) val input1: DataStream[String] = env.readTextFile("data/weekthree/OrderLog.csv") val input2: DataStream[String] = env.readTextFile("data/weekthree/ReceiptLog.csv") val orderDs: DataStream[Order] = input1.filter(f => { f.split(",").length == 4 && f.split(",")(2) != "" }).map(m => { val s: Array[String] = m.split(",") Order(s(0), s(1), s(2), s(3).trim.toLong * 1000) }) val recDs: DataStream[Rec] = input2.map(m => { val s: Array[String] = m.split(",") Rec(s(0), s(1), s(2).trim.toLong * 1000) }) //指定订单编号 为时间 连接两条流 val value2: DataStream[Order] = orderDs.assignAscendingTimestamps(_.time1) val value3: DataStream[Rec] = recDs.assignAscendingTimestamps(_.time2) val value: ConnectedStreams[Order, Rec] = value2.connect(value3) //对两条流相同的zid进行分组 val value1: ConnectedStreams[Order, Rec] = value.keyBy("zid": String,"zid": String) val tag1 = new OutputTag[Order]("celiu1") val tag2 = new OutputTag[Rec]("celiu2") val value4: DataStream[String] = value1.process(new MyCo(tag1, tag2)) value4.print("主流:") value4.getSideOutput(tag1).print("侧输出流输出有订单但未到账的数据:") value4.getSideOutput(tag2).print("侧输出流输出有到账但订单数据缺失的数据:") env.execute() } } //处理双流数据 class MyCo(t1:OutputTag[Order],t2:OutputTag[Rec]) extends CoProcessFunction[Order,Rec,String] { //创建两个状态 lazy val order1=getRuntimeContext.getState(new ValueStateDescriptor[Order]("o",classOf[Order])) lazy val rec1=getRuntimeContext.getState(new ValueStateDescriptor[Rec]("r",classOf[Rec])) //处理Order事件 override def processElement1(value: Order, ctx: CoProcessFunction[Order, Rec, String]#Context, out: Collector[String]): Unit = { //查看rec 是否先到 到了就合并输出 if(rec1.value()!=null){ out.collect("匹配成功"+value+"=="+rec1.value()) }else{//没到 将值存储到状态值 创建计时器 order1.update(value) ctx.timerService().registerEventTimeTimer(value.time1+15000) } } override def processElement2(value: Rec, ctx: CoProcessFunction[Order, Rec, String]#Context, out: Collector[String]): Unit = { if(order1.value()!=null){ out.collect("匹配成功"+value+"=="+order1.value()) }else{//没到 存储状态值 并给出等待时间 计时 rec1.update(value) ctx.timerService().registerEventTimeTimer(value.time2+15000) } } //定时器 override def onTimer(timestamp: Long, ctx: CoProcessFunction[Order, Rec, String]#OnTimerContext, out: Collector[String]): Unit = { //order1状态有 ,rec1 没有 侧流一 if(order1.value()!=null){ ctx.output(t1,order1.value()) } //order1状态没有 ,rec1 有 侧流二 if(rec1.value()!=null){ ctx.output(t2,rec1.value()) } //清空数据的操作 rec1.clear() order1.clear() } }
效果:
我有一杯酒,足以慰风尘。