Flink双流消费kafka合并数据,并包含滑动窗口、算子、输出到MySQL的示例
Java示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.java.functions.KeySelector; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction; import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor; import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.util.Collector; import java.util.Properties; public class KafkaStreamJoin { public static void main(String[] args) throws Exception { // 创建执行环境 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // 设置 Kafka 配置信息 Properties props = new Properties(); props.setProperty( "bootstrap.servers" , "localhost:9092" ); props.setProperty( "group.id" , "test" ); // 创建 FlinkKafkaConsumer,并添加数据源1 FlinkKafkaConsumer<String> kafkaConsumer1 = new FlinkKafkaConsumer<>( "topic1" , new SimpleStringSchema(), props); DataStream<String> stream1 = env.addSource(kafkaConsumer1); // 创建 FlinkKafkaConsumer,并添加数据源2 FlinkKafkaConsumer<String> kafkaConsumer2 = new FlinkKafkaConsumer<>( "topic2" , new SimpleStringSchema(), props); DataStream<String> stream2 = env.addSource(kafkaConsumer2); // 提取时间戳,以便基于时间的窗口 DataStream<Tuple2<String, Integer>> keyedStream1 = stream1.map( new MapFunction<String, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> map(String value) throws Exception { String[] parts = value.split( "," ); return new Tuple2<>(parts[ 0 ], Integer.parseInt(parts[ 1 ])); } }).assignTimestampsAndWatermarks( new AscendingTimestampExtractor<Tuple2<String, Integer>>() { @Override public long extractAscendingTimestamp(Tuple2<String, Integer> element) { return element.f1; } }).keyBy( new KeySelector<Tuple2<String, Integer>, String>() { @Override public String getKey(Tuple2<String, Integer> value) throws Exception { return value.f0; } }); DataStream<Tuple2<String, Integer>> keyedStream2 = stream2.map( new MapFunction<String, Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> map(String value) throws Exception { String[] parts = value.split( "," ); return new Tuple2<>(parts[ 0 ], Integer.parseInt(parts[ 1 ])); } }).assignTimestampsAndWatermarks( new AscendingTimestampExtractor<Tuple2<String, Integer>>() { @Override public long extractAscendingTimestamp(Tuple2<String, Integer> element) { return element.f1; } }).keyBy( new KeySelector<Tuple2<String, Integer>, String>() { @Override public String getKey(Tuple2<String, Integer> value) throws Exception { return value.f0; } }); // 合并两个流,使用窗口进行计算 DataStream<String> result = keyedStream1.join(keyedStream2) .where (Tuple2<String, Integer> left, Tuple2<String, Integer> right, Collector<String> out) -> { out.collect(left.f0 + "," + left.f1 + "," + right.f1); }) .window(SlidingEventTimeWindows.of(Time.seconds( 30 ), Time.seconds( 10 ))) .sum( 1 ) .map( new MapFunction<Tuple2<String, Integer>, String>() { @Override public String map(Tuple2<String, Integer> value) throws Exception { return value.f0 + "," + value.f1; } }); // 将计算结果写入 MySQL 数据库 String sql = "INSERT INTO result (key, count) VALUES (?, ?)" ; JDBCOutputFormat jdbcOutputFormat = JDBCOutputFormat.buildJDBCOutputFormat() .setDrivername( "com.mysql.jdbc.Driver" ) .setDBUrl( "jdbc:mysql://localhost:3306/test" ) .setUsername( "root" ) .setPassword( "password" ) .setQuery(sql) .setBatchInterval( 5000 ) .finish(); result.addSink(jdbcOutputFormat); // 执行程序 env.execute( "KafkaStreamJoin" ); } } |
使用FlinkTableApi
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.streaming.connectors.mysql.MySQLUpsertTableSink; import org.apache.flink.table.api.EnvironmentSettings; import org.apache.flink.table.api.Table; import org.apache.flink.table.api.TableEnvironment; import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; import org.apache.flink.table.api.config.TableConfigOptions; import org.apache.flink.table.sinks.TableSink; import org.apache.flink.types.Row; import java.util.Properties; public class KafkaStreamJoin { public static void main(String[] args) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); EnvironmentSettings settings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build(); StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env, settings); Properties props = new Properties(); props.setProperty( "bootstrap.servers" , "localhost:9092" ); props.setProperty( "group.id" , "test" ); DataStream<String> stream1 = env .addSource( new FlinkKafkaConsumer<>( "stream1" , new SimpleStringSchema(), props)); DataStream<String> stream2 = env .addSource( new FlinkKafkaConsumer<>( "stream2" , new SimpleStringSchema(), props)); Table table1 = tableEnv.fromDataStream(stream1, "key1, value1, ts1.rowtime" ); Table table2 = tableEnv.fromDataStream(stream2, "key2, value2, ts2.rowtime" ); Table resultTable = table1.join(table2) .where( "key1 = key2 && ts2 >= ts1 - INTERVAL '5' SECOND && ts2 <= ts1 + INTERVAL '5' SECOND" ) .select( "key1, value1, value2" ) .groupBy( "key1, value1, value2, TUMBLE(ts1, INTERVAL '30' SECOND, INTERVAL '10' SECOND)" ) .select( "key1, value1, value2, count(1) as cnt" ); // 配置 MySQL 连接信息 String driverName = "com.mysql.jdbc.Driver" ; String url = "jdbc:mysql://localhost:3306/test" ; String username = "root" ; String password = "123456" ; // 定义 MySQL UpsertTableSink TableSink<Row> tableSink = new MySQLUpsertTableSink( new String[]{ "key1" , "value1" , "value2" , "cnt" }, new int []{Types.STRING, Types.INT, Types.INT, Types.LONG}, url, username, password, driverName); // 配置 TableEnvironment tableEnv.getConfig().getConfiguration().setBoolean(TableConfigOptions.WRITE_MODE_ALLOW_SPECIFIC, true ); tableEnv.getConfig().getConfiguration().setString(TableConfigOptions.WRITE_MODE, "UPSERT" ); // 将计算结果写入 MySQL 数据库 resultTable .map( new MapFunction<Row, Row>() { @Override public Row map(Row value) throws Exception { return value; } }) .addSink(tableSink); env.execute( "KafkaStreamJoin" ); } } |
java版本对应的maven库
Group ID | Artifact ID | Version |
---|---|---|
org.apache.flink | flink-core | 1.12.5 |
org.apache.flink | flink-streaming-java_2.12 | 1.12.5 |
org.apache.flink | flink-table-api-java-bridge_2.12 | 1.12.5 |
org.apache.flink | flink-table-planner_2.12 | 1.12.5 |
org.apache.flink | flink-connector-kafka_2.12 | 1.12.5 |
org.apache.kafka | kafka-clients | 2.4.1 |
mysql | mysql-connector-java | 8.0.23 |
org.apache.flink | flink-jdbc_2.12 | 1.12.5 |
scala示例
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | import org.apache.flink.api.common.functions.JoinFunction import org.apache.flink.api.common.serialization.SimpleStringSchema import org.apache.flink.api.scala._ import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment} import org.apache.flink.streaming.api.windowing.assigners.SlidingProcessingTimeWindows import org.apache.flink.streaming.api.windowing.time.Time import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer object FlinkKafkaJoinExample { case class SensorReading(id: String, timestamp: Long, temperature: Double) def main(args: Array[String]): Unit = { val env = StreamExecutionEnvironment.getExecutionEnvironment // 从Kafka中读取数据流 val properties = new Properties() properties.setProperty( "bootstrap.servers" , "localhost:9092" ) properties.setProperty( "group.id" , "test-group" ) properties.setProperty( "auto.offset.reset" , "earliest" ) val stream1: DataStream[SensorReading] = env.addSource( new FlinkKafkaConsumer[String]( "topic1" , new SimpleStringSchema(), properties) ) .map(data => { val dataArray = data.split( "," ) SensorReading(dataArray( 0 ), dataArray( 1 ).toLong, dataArray( 2 ).toDouble) }) .assignAscendingTimestamps(_.timestamp) val stream2: DataStream[SensorReading] = env.addSource( new FlinkKafkaConsumer[String]( "topic2" , new SimpleStringSchema(), properties) ) .map(data => { val dataArray = data.split( "," ) SensorReading(dataArray( 0 ), dataArray( 1 ).toLong, dataArray( 2 ).toDouble) }) .assignAscendingTimestamps(_.timestamp) // 将两条数据流进行合并 val joinedStream: DataStream[(String, Double, Double)] = stream1.join(stream2) .where(_.id) .equalTo(_.id) .window(SlidingProcessingTimeWindows.of(Time.seconds( 10 ), Time.seconds( 5 ))) .apply( new JoinFunction[SensorReading, SensorReading, (String, Double, Double)] { override def join(first: SensorReading, second: SensorReading): (String, Double, Double) = (first.id, first.temperature, second.temperature) }) // 对合并后的流进行算子操作 val resultStream: DataStream[(String, Double)] = joinedStream .map(data => (data._1, (data._2 + data._3) / 2 )) .filter(data => data._2 > 30 ) // 将结果输出到MySQL resultStream.addSink( new JdbcSink[(String, Double)]( "INSERT INTO result_table (id, temperature) VALUES (?, ?)" , new JdbcStatementBuilder[(String, Double)] { override def accept(ps: PreparedStatement, v: (String, Double)): Unit = { ps.setString( 1 , v._1) ps.setDouble( 2 , v._2) } }, new JdbcConnectionOptions.JdbcConnectionOptionsBuilder() .withUrl( "jdbc:mysql://localhost:3306/test" ) .withDriverName( "com.mysql.jdbc.Driver" ) .withUsername( "root" ) .withPassword( "password" ) .build() )) env.execute( "Flink Kafka Join Example" ) } } |
scala的依赖包maven引用
<dependencies> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-streaming-java_2.11</artifactId> <version>1.12.2</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-clients_2.11</artifactId> <version>1.12.2</version> </dependency> <dependency> <groupId>org.apache.flink</groupId> <artifactId>flink-connector-kafka_2.11</artifactId> <version>1.12.2</version> </dependency> <dependency> <groupId>mysql</groupId> <artifactId>mysql-connector-java</artifactId> <version>8.0.26</version> </dependency> </dependencies>
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 记一次.NET内存居高不下排查解决与启示
· DeepSeek 开源周回顾「GitHub 热点速览」
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了