Flink CEP 简单应用

连续两次登录失败

复制代码
import org.apache.flink.cep.PatternSelectFunction
import org.apache.flink.cep.scala.{CEP, PatternStream}
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala._

import java.util

//001,192.168.0.1,fail,1646038440
case class Login(uid:String,ip:String,status:String,ts:Long)

/**
 * https://nightlies.apache.org/flink/flink-docs-release-1.13/docs/libs/cep/
 */
object LoginCEP {
  def main(args: Array[String]): Unit = {
    //获得流处理执行环境
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    //获取数据源 创建数据集
    val stream: DataStream[String] = env.socketTextStream("hdp1", 9999)


    val loginDS: DataStream[Login] = stream.map(line => {
      val li: Array[String] = line.split(",")
      Login(li(0), li(1), li(2), li(3).trim.toLong)
    })

    val timeDS: DataStream[Login] = loginDS.assignAscendingTimestamps(_.ts * 1000)
    //1、输入数据
    val eventDS: KeyedStream[Login, String] = timeDS.keyBy(_.uid)
    //2、定义规则:连续两次登录失败
    val pattern: Pattern[Login, Login] = Pattern
      .begin[Login]("start")
      .where(_.status.equals("fail"))
      .next("next")
      .where(_.status.equals("fail"))
    //3、将规则作用到输入数据
    val result: PatternStream[Login] = CEP.pattern(eventDS, pattern)
    //4、输出
    val alertDS: DataStream[String] = result.select(new MyPatterSelectFunction)

    alertDS.print()

    env.execute()

  }
}
class MyPatterSelectFunction extends PatternSelectFunction[Login,String] {
  override def select(map: util.Map[String, util.List[Login]]): String = {
    //map得到的就是符合规则的数据
    "连续两次登录失败警告:"+map
  }
}
复制代码

 

分流

复制代码
import com.alibaba.fastjson.JSON
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.cep.{PatternSelectFunction, PatternTimeoutFunction}
import org.apache.flink.cep.scala.{CEP, PatternStream}
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.streaming.connectors.kafka.{FlinkKafkaConsumer, FlinkKafkaProducer}
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time

import java.util
import java.util.Properties
import scala.util.parsing.json.JSON

object t1 {
  def main(args: Array[String]): Unit = {
    //Flink 流 处理环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //设置并行度为1
    env.setParallelism(1)
    //指定事件时间  已经过时的方法
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    //kafka
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "hdp1:9092")
    properties.setProperty("group.id", "test")
    val stream = env
      .addSource(new FlinkKafkaConsumer[String]("dwd_page_log", new SimpleStringSchema(), properties))
    val v1: DataStream[Stu] = stream.map(x => {
      val ss: Array[String] = x.split(",")
      Stu(scala.util.parsing.json.JSON.formatted(ss(0)),scala.util.parsing.json.JSON.formatted(ss(1)),ss(2).trim.toLong*1000L)
    })
    //指定时间 并分组
    val ks: KeyedStream[Stu, String] = v1.assignAscendingTimestamps(_.time).keyBy(_.str)
    //定义cep模式序列开始没有last_page_id的数据
    //定义cep模式序列next没有last_page_id的数据
    //定义cep模式序列时间设置10S内
    val pattern: Pattern[Stu, Stu] = Pattern
      .begin[Stu]("start")
      .where(!_.str.equals("last_page_id"))
      .next("next")
      .where(!_.str2.equals("last_page_id"))
      .within(Time.seconds(10))
    //讲模式序列添加到流中
    val ps: PatternStream[Stu] = CEP.pattern(ks, pattern)
    //创建超时标签
    val lag = new OutputTag[String]("outlag")
    //使用select算子给跳出数据输出到测流
    val ds: DataStream[String] = ps.select(lag, new MyTime, new MySel)
    //获取跳出用户流
    ds.print("跳出用户")
    ds.getSideOutput(lag).print("超时:")
    //跳出用户流落地到kafka的dwm_user_jump_detail
    val value = new FlinkKafkaProducer[String]("dwm_user_jump_detail", new SimpleStringSchema(), properties)
    ds.addSink(value)
    env.execute()
  }
}
//实体类
case class Stu(str: String,str2:String, time: Long)
//处理select 输出
class MyTime extends PatternTimeoutFunction[(Stu),(String)] {
  override def timeout(map: util.Map[String, util.List[Stu]], l: Long): String = {
      val value: String = map.get("start").get(0).str
      "id" + 1 + "用户id:" + value +"超时"
  }
}

class MySel extends PatternSelectFunction[(Stu),(String)] {
  override def select(map: util.Map[String, util.List[(Stu)]]): String = {
    map.toString
  }
}
复制代码

 

posted @   御本美琴初号机  阅读(97)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· winform 绘制太阳,地球,月球 运作规律
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
点击右上角即可分享
微信分享提示