Flink双流消费kafka合并数据,并包含滑动窗口、算子、输出到MySQL的示例

Java示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
 
import java.util.Properties;
 
public class KafkaStreamJoin {
 
    public static void main(String[] args) throws Exception {
        // 创建执行环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
         
        // 设置 Kafka 配置信息
        Properties props = new Properties();
        props.setProperty("bootstrap.servers", "localhost:9092");
        props.setProperty("group.id", "test");
 
        // 创建 FlinkKafkaConsumer,并添加数据源1
        FlinkKafkaConsumer<String> kafkaConsumer1 = new FlinkKafkaConsumer<>("topic1", new SimpleStringSchema(), props);
        DataStream<String> stream1 = env.addSource(kafkaConsumer1);
 
        // 创建 FlinkKafkaConsumer,并添加数据源2
        FlinkKafkaConsumer<String> kafkaConsumer2 = new FlinkKafkaConsumer<>("topic2", new SimpleStringSchema(), props);
        DataStream<String> stream2 = env.addSource(kafkaConsumer2);
 
        // 提取时间戳,以便基于时间的窗口
        DataStream<Tuple2<String, Integer>> keyedStream1 = stream1.map(new MapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(String value) throws Exception {
                String[] parts = value.split(",");
                return new Tuple2<>(parts[0], Integer.parseInt(parts[1]));
            }
        }).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<String, Integer>>() {
            @Override
            public long extractAscendingTimestamp(Tuple2<String, Integer> element) {
                return element.f1;
            }
        }).keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
            }
        });
 
        DataStream<Tuple2<String, Integer>> keyedStream2 = stream2.map(new MapFunction<String, Tuple2<String, Integer>>() {
            @Override
            public Tuple2<String, Integer> map(String value) throws Exception {
                String[] parts = value.split(",");
                return new Tuple2<>(parts[0], Integer.parseInt(parts[1]));
            }
        }).assignTimestampsAndWatermarks(new AscendingTimestampExtractor<Tuple2<String, Integer>>() {
            @Override
            public long extractAscendingTimestamp(Tuple2<String, Integer> element) {
                return element.f1;
            }
        }).keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
            @Override
            public String getKey(Tuple2<String, Integer> value) throws Exception {
                return value.f0;
            }
        });
 
        // 合并两个流,使用窗口进行计算
        DataStream<String> result = keyedStream1.join(keyedStream2)
                .where
                (Tuple2<String, Integer> left, Tuple2<String, Integer> right, Collector<String> out) -> {
                    out.collect(left.f0 + "," + left.f1 + "," + right.f1);
                })
                .window(SlidingEventTimeWindows.of(Time.seconds(30), Time.seconds(10)))
                .sum(1)
                .map(new MapFunction<Tuple2<String, Integer>, String>() {
                    @Override
                    public String map(Tuple2<String, Integer> value) throws Exception {
                        return value.f0 + "," + value.f1;
                    }
                });
 
        // 将计算结果写入 MySQL 数据库
        String sql = "INSERT INTO result (key, count) VALUES (?, ?)";
        JDBCOutputFormat jdbcOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat()
                .setDrivername("com.mysql.jdbc.Driver")
                .setDBUrl("jdbc:mysql://localhost:3306/test")
                .setUsername("root")
                .setPassword("password")
                .setQuery(sql)
                .setBatchInterval(5000)
                .finish();
 
        result.addSink(jdbcOutputFormat);
 
        // 执行程序
        env.execute("KafkaStreamJoin");
    }
}

  

使用FlinkTableApi

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.streaming.connectors.mysql.MySQLUpsertTableSink;
import org.apache.flink.table.api.EnvironmentSettings;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.TableEnvironment;
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
import org.apache.flink.table.api.config.TableConfigOptions;
import org.apache.flink.table.sinks.TableSink;
import org.apache.flink.types.Row;
 
import java.util.Properties;
 
public class KafkaStreamJoin {
 
    public static void main(String[] args) throws Exception {
 
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build();
        StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings);
 
        Properties props = new Properties();
        props.setProperty("bootstrap.servers", "localhost:9092");
        props.setProperty("group.id", "test");
 
        DataStream<String> stream1 = env
                .addSource(new FlinkKafkaConsumer<>("stream1", new SimpleStringSchema(), props));
        DataStream<String> stream2 = env
                .addSource(new FlinkKafkaConsumer<>("stream2", new SimpleStringSchema(), props));
 
        Table table1 = tableEnv.fromDataStream(stream1, "key1, value1, ts1.rowtime");
        Table table2 = tableEnv.fromDataStream(stream2, "key2, value2, ts2.rowtime");
 
        Table resultTable = table1.join(table2)
                .where("key1 = key2 && ts2 >= ts1 - INTERVAL '5' SECOND && ts2 <= ts1 + INTERVAL '5' SECOND")
                .select("key1, value1, value2")
                .groupBy("key1, value1, value2, TUMBLE(ts1, INTERVAL '30' SECOND, INTERVAL '10' SECOND)")
                .select("key1, value1, value2, count(1) as cnt");
 
        // 配置 MySQL 连接信息
        String driverName = "com.mysql.jdbc.Driver";
        String url = "jdbc:mysql://localhost:3306/test";
        String username = "root";
        String password = "123456";
 
        // 定义 MySQL UpsertTableSink
        TableSink<Row> tableSink = new MySQLUpsertTableSink(
                new String[]{"key1", "value1", "value2", "cnt"},
                new int[]{Types.STRING, Types.INT, Types.INT, Types.LONG},
                url, username, password, driverName);
 
        // 配置 TableEnvironment
        tableEnv.getConfig().getConfiguration().setBoolean(TableConfigOptions.WRITE_MODE_ALLOW_SPECIFIC, true);
        tableEnv.getConfig().getConfiguration().setString(TableConfigOptions.WRITE_MODE, "UPSERT");
 
        // 将计算结果写入 MySQL 数据库
        resultTable
                .map(new MapFunction<Row, Row>() {
                    @Override
                    public Row map(Row value) throws Exception {
                        return value;
                    }
                })
                .addSink(tableSink);
 
        env.execute("KafkaStreamJoin");
    }
}

java版本对应的maven库

Group IDArtifact IDVersion
org.apache.flink flink-core 1.12.5
org.apache.flink flink-streaming-java_2.12 1.12.5
org.apache.flink flink-table-api-java-bridge_2.12 1.12.5
org.apache.flink flink-table-planner_2.12 1.12.5
org.apache.flink flink-connector-kafka_2.12 1.12.5
org.apache.kafka kafka-clients 2.4.1
mysql mysql-connector-java 8.0.23
org.apache.flink flink-jdbc_2.12 1.12.5

  

 

scala示例

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import org.apache.flink.api.common.functions.JoinFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
 
object FlinkKafkaJoinExample {
  case class SensorReading(id: String, timestamp: Long, temperature: Double)
 
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
 
    // 从Kafka中读取数据流
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "localhost:9092")
    properties.setProperty("group.id", "test-group")
    properties.setProperty("auto.offset.reset", "earliest")
 
    val stream1: DataStream[SensorReading] = env.addSource(
      new FlinkKafkaConsumer[String]("topic1", new SimpleStringSchema(), properties)
    )
      .map(data => {
        val dataArray = data.split(",")
        SensorReading(dataArray(0), dataArray(1).toLong, dataArray(2).toDouble)
      })
      .assignAscendingTimestamps(_.timestamp)
 
    val stream2: DataStream[SensorReading] = env.addSource(
      new FlinkKafkaConsumer[String]("topic2", new SimpleStringSchema(), properties)
    )
      .map(data => {
        val dataArray = data.split(",")
        SensorReading(dataArray(0), dataArray(1).toLong, dataArray(2).toDouble)
      })
      .assignAscendingTimestamps(_.timestamp)
 
    // 将两条数据流进行合并
    val joinedStream: DataStream[(String, Double, Double)] = stream1.join(stream2)
      .where(_.id)
      .equalTo(_.id)
      .window(SlidingProcessingTimeWindows.of(Time.seconds(10), Time.seconds(5)))
      .apply(new JoinFunction[SensorReading, SensorReading, (String, Double, Double)] {
        override def join(first: SensorReading, second: SensorReading): (String, Double, Double) =
          (first.id, first.temperature, second.temperature)
      })
 
    // 对合并后的流进行算子操作
    val resultStream: DataStream[(String, Double)] = joinedStream
      .map(data => (data._1, (data._2 + data._3) / 2))
      .filter(data => data._2 > 30)
 
    // 将结果输出到MySQL
    resultStream.addSink(new JdbcSink[(String, Double)](
      "INSERT INTO result_table (id, temperature) VALUES (?, ?)",
      new JdbcStatementBuilder[(String, Double)] {
        override def accept(ps: PreparedStatement, v: (String, Double)): Unit = {
          ps.setString(1, v._1)
          ps.setDouble(2, v._2)
        }
      },
      new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
        .withUrl("jdbc:mysql://localhost:3306/test")
        .withDriverName("com.mysql.jdbc.Driver")
        .withUsername("root")
        .withPassword("password")
        .build()
    ))
 
    env.execute("Flink Kafka Join Example")
  }
}

  

scala的依赖包maven引用

复制代码
<dependencies>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-streaming-java_2.11</artifactId>
        <version>1.12.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-clients_2.11</artifactId>
        <version>1.12.2</version>
    </dependency>
    <dependency>
        <groupId>org.apache.flink</groupId>
        <artifactId>flink-connector-kafka_2.11</artifactId>
        <version>1.12.2</version>
    </dependency>
    <dependency>
        <groupId>mysql</groupId>
        <artifactId>mysql-connector-java</artifactId>
        <version>8.0.26</version>
    </dependency>
</dependencies>
复制代码

 

posted @   余额一个亿  阅读(441)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· DeepSeek 开源周回顾「GitHub 热点速览」
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了
点击右上角即可分享
微信分享提示