sunny123456

  博客园 :: 首页 :: 博问 :: 闪存 :: 新随笔 :: 联系 :: 订阅 订阅 :: 管理 ::

java flink(二十六) 实战之电商黑名单过滤 Flink CEP编程实现、什么是CEP、CEP组合模式demo、CEP循环模式demo

什么是CEP:

1、复杂事件处理

2、Flink中实现复杂事件处理库

3、CEP允许在无休止的事件中检测事件模式,让我们有机会掌握数据中的重要部分

4、一个或多个由简单事件构成的事件通过一定的规则匹配,然后输出用户想要的数据。

CEP API:

begin 第一个事件定义 

where 条件

next 后一个紧跟着事件定义

subtype 子类型判断

followedBy 后边的时间 不一定紧跟着

CEP 个体模式 :

CEP 模式序列 

 

CEP 超时处理 

CEP组合模式demo 

题目:根据上篇的黑名单过滤功能,我们进行优化,如果两条失败之间穿插了一条乱序的成功登录,那么这两条失败不会被检测。

我们利用CEP,首先第一个事件是检测第一条失败登录,然后第二个事件是检测第二个失败登录,进行筛选打印报警信息。

1、引入jar包:

2、代码展示

  1. package Project;
  2. import Beans.LoginEvent;
  3. import Beans.LoginFailWarning;
  4. import org.apache.flink.cep.CEP;
  5. import org.apache.flink.cep.PatternSelectFunction;
  6. import org.apache.flink.cep.PatternStream;
  7. import org.apache.flink.cep.pattern.Pattern;
  8. import org.apache.flink.cep.pattern.conditions.SimpleCondition;
  9. import org.apache.flink.streaming.api.TimeCharacteristic;
  10. import org.apache.flink.streaming.api.datastream.DataStream;
  11. import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
  12. import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  13. import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
  14. import org.apache.flink.streaming.api.windowing.time.Time;
  15. import java.net.URL;
  16. import java.util.List;
  17. import java.util.Map;
  18. public class LoginFailWithCep {
  19. public static void main(String[] args) throws Exception{
  20. StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  21. env.setParallelism(1);
  22. env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
  23. //读取数据
  24. URL resource = LoginFailWithCep.class.getResource("/LoginLog.csv");
  25. DataStream<LoginEvent> logEventStream = env.readTextFile(resource.getPath())
  26. .map(line -> {
  27. String[] fields = line.split(",");
  28. return new LoginEvent(new Long(fields[0]), fields[1], fields[2], new Long(fields[3]));
  29. })
  30. .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<LoginEvent>(Time.seconds(3)) {
  31. @Override
  32. public long extractTimestamp(LoginEvent loginEvent) {
  33. return loginEvent.getTimestamp() * 1000;
  34. }
  35. });
  36. //定义匹配模式
  37. //匹配第一次失败与第二次失败在2s内
  38. Pattern<LoginEvent, LoginEvent> loginFailPattern = Pattern.<LoginEvent>begin("firstFail").where(new SimpleCondition<LoginEvent>() {
  39. @Override
  40. public boolean filter(LoginEvent loginEvent) throws Exception {
  41. return "fail".equals(loginEvent.getLoginState());
  42. }
  43. }).next("secondFail").where(new SimpleCondition<LoginEvent>() {
  44. @Override
  45. public boolean filter(LoginEvent loginEvent) throws Exception {
  46. return "fail".equals(loginEvent.getLoginState());
  47. }
  48. }).within(Time.seconds(2)); //2s内
  49. //将匹配模式应用到数据流上
  50. PatternStream<LoginEvent> patternStream = CEP.pattern(logEventStream.keyBy(LoginEvent::getUserId), loginFailPattern);
  51. //检出符合匹配条件的复杂事件 进行转换处理 得到报警信息
  52. SingleOutputStreamOperator<LoginFailWarning> warningStream = patternStream.select(new LoginFailMatchDetectWarning());
  53. warningStream.print();
  54. env.execute("login fail detect with cep job");
  55. }
  56. //实现自定义的pattern select function
  57. public static class LoginFailMatchDetectWarning implements PatternSelectFunction<LoginEvent, LoginFailWarning>{
  58. @Override
  59. public LoginFailWarning select(Map<String, List<LoginEvent>> map) throws Exception {
  60. LoginEvent firstFailEvent = map.get("firstFail").get(0);
  61. LoginEvent secondFailEvent = map.get("secondFail").get(0);
  62. return new LoginFailWarning(firstFailEvent.getUserId(),firstFailEvent.getTimestamp(),secondFailEvent.getTimestamp(),"login fail 2 times");
  63. }
  64. }
  65. }

题目二:如果是检测连续三条失败,那么我们还要继续编写组合模式的条件,这样比较麻烦,我们利用CEP循环模式进行实现

  1. package Project;
  2. import Beans.LoginEvent;
  3. import Beans.LoginFailWarning;
  4. import org.apache.flink.cep.CEP;
  5. import org.apache.flink.cep.PatternSelectFunction;
  6. import org.apache.flink.cep.PatternStream;
  7. import org.apache.flink.cep.pattern.Pattern;
  8. import org.apache.flink.cep.pattern.conditions.SimpleCondition;
  9. import org.apache.flink.streaming.api.TimeCharacteristic;
  10. import org.apache.flink.streaming.api.datastream.DataStream;
  11. import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
  12. import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
  13. import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
  14. import org.apache.flink.streaming.api.windowing.time.Time;
  15. import java.net.URL;
  16. import java.util.List;
  17. import java.util.Map;
  18. public class LoginFailWithCep {
  19. public static void main(String[] args) throws Exception{
  20. StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  21. env.setParallelism(1);
  22. env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
  23. //读取数据
  24. URL resource = LoginFailWithCep.class.getResource("/LoginLog.csv");
  25. DataStream<LoginEvent> logEventStream = env.readTextFile(resource.getPath())
  26. .map(line -> {
  27. String[] fields = line.split(",");
  28. return new LoginEvent(new Long(fields[0]), fields[1], fields[2], new Long(fields[3]));
  29. })
  30. .assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<LoginEvent>(Time.seconds(3)) {
  31. @Override
  32. public long extractTimestamp(LoginEvent loginEvent) {
  33. return loginEvent.getTimestamp() * 1000;
  34. }
  35. });
  36. //定义匹配模式
  37. //匹配第一次失败与第二次失败在2s内
  38. Pattern<LoginEvent, LoginEvent> loginFailPattern = Pattern.<LoginEvent>begin("failEvents")
  39. .where(new SimpleCondition<LoginEvent>() {
  40. @Override
  41. public boolean filter(LoginEvent loginEvent) throws Exception {
  42. return "fail".equals(loginEvent.getLoginState());
  43. }})
  44. //.times(3) //连续三次 非严格近邻(只要后边有就算)
  45. .times(3).consecutive() //必须严格近邻
  46. .within(Time.seconds(5));
  47. //将匹配模式应用到数据流上
  48. PatternStream<LoginEvent> patternStream = CEP.pattern(logEventStream.keyBy(LoginEvent::getUserId), loginFailPattern);
  49. //检出符合匹配条件的复杂事件 进行转换处理 得到报警信息
  50. SingleOutputStreamOperator<LoginFailWarning> warningStream = patternStream.select(new LoginFailMatchDetectWarning());
  51. warningStream.print();
  52. env.execute("login fail detect with cep job");
  53. }
  54. //实现自定义的pattern select function
  55. public static class LoginFailMatchDetectWarning implements PatternSelectFunction<LoginEvent, LoginFailWarning>{
  56. @Override
  57. public LoginFailWarning select(Map<String, List<LoginEvent>> map) throws Exception {
  58. // LoginEvent firstFailEvent = map.get("firstFail").get(0);
  59. // LoginEvent secondFailEvent = map.get("secondFail").get(0);
  60. // return new LoginFailWarning(firstFailEvent.getUserId(),firstFailEvent.getTimestamp(),secondFailEvent.getTimestamp(),"login fail 2 times");
  61. LoginEvent firstFailEvent = map.get("failEvents").get(0);
  62. LoginEvent lastFailEvent = map.get("failEvents").get(map.size()-1);
  63. return new LoginFailWarning(firstFailEvent.getUserId(),firstFailEvent.getTimestamp(),lastFailEvent.getTimestamp(),"login fail 3 times");
  64. }
  65. }
  66. }

 

原文链接:https://blog.csdn.net/qq_40771567/article/details/117109794
posted on 2024-01-31 19:49  sunny123456  阅读(58)  评论(0编辑  收藏  举报