【flink学习系列2】消费kafka数据sink到Postgresql
import java.util.Properties; import org.apache.flink.api.common.RuntimeExecutionMode; import org.apache.flink.api.common.functions.FlatMapFunction; import org.apache.flink.api.common.serialization.SimpleStringSchema; import org.apache.flink.api.java.tuple.Tuple15; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; import org.apache.flink.util.Collector; /** * Desc 演示Flink-Connectors-KafkaComsumer/Source */ public class SinkDemoKafkaToPostgresql { public static void main(String[] args) throws Exception { //TODO 0.env StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC); //TODO 1.source //准备kafka连接参数 Properties props = new Properties(); props.setProperty("bootstrap.servers", "192.168.78.203:9092,192.168.78.204:9092,192.168.78.205:9092");//集群地址 props.setProperty("group.id", "flink2");//消费者组id props.setProperty("auto.offset.reset","earliest");//latest有offset记录从记录位置开始消费,没有记录从最新的/最后的消息开始消费 /earliest有offset记录从记录位置开始消费,没有记录从最早的/最开始的消息开始消费 props.setProperty("flink.partition-discovery.interval-millis","5000");//会开启一个后台线程每隔5s检测一下Kafka的分区情况,实现动态分区检测 props.setProperty("enable.auto.commit", "true");//自动提交(提交到默认主题,后续学习了Checkpoint后随着Checkpoint存储在Checkpoint和默认主题中) props.setProperty("auto.commit.interval.ms", "2000");//自动提交的时间间隔 //使用连接参数创建FlinkKafkaConsumer/kafkaSource FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>("FileToKafka", new SimpleStringSchema(), props); //使用kafkaSource // DataStream<String> kafkaDS = env.addSource(kafkaSource).setParallelism(4); //TODO 2.transformation DataStream<Tuple15<String, String, String,String, String, String,String, String, String,String, String, String,String, String, String>> messageStream = env.addSource(kafkaSource).flatMap(new FlatMapFunction<String, Tuple15<String, String, String,String, String, String,String, String, String,String, String, String,String, String, String>>() { @Override public void flatMap(String value, Collector<Tuple15<String, String, String,String, String, String,String, String, String,String, String, String,String, String, String>> out) throws Exception { //value就是每一行数据 String[] arr = value.split("\\|"); // System.out.println("=========="+arr.length+"|"+arr[14]); if (arr.length > 13) { out.collect(new Tuple15<>(arr[0],arr[1],arr[2],arr[3],arr[4],arr[5],arr[6],arr[7],arr[8],arr[9],arr[10],arr[11],arr[12],arr[13],arr[14])); } } }).setParallelism(4); //TODO 3.sink // messageStream.print(); messageStream.addSink(new PostgreSQLSink()).setParallelism(4); //TODO 4.execute env.execute(); } }
import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import org.apache.flink.api.java.tuple.Tuple15; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; public class PostgreSQLSink extends RichSinkFunction<Tuple15<String,String,String,String,String,String,String,String,String,String,String,String,String,String,String>> { private static final long serialVersionUID = 1L; private Connection connection; private PreparedStatement preparedStatement; /** * open方法是初始化方法,会在invoke方法之前执行,执行一次。 */ @Override public void open(Configuration parameters) throws Exception { // JDBC连接信息 String USERNAME = "test_user" ; String PASSWORD = "abc123"; String DRIVERNAME = "org.postgresql.Driver"; String DBURL = "jdbc:postgresql://192.168.1.203/test_db"; // 加载JDBC驱动 Class.forName(DRIVERNAME); // 获取数据库连接 connection = DriverManager.getConnection(DBURL,USERNAME,PASSWORD); String sql = "insert into kafka_ms_sffdata(comcode, organcode, deskdate,type,currency,riskcode,clausetype,kindcode,businessnature,policyno,insuredtype,amt,agentcode,startdate,sffkindcode) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)"; preparedStatement = connection.prepareStatement(sql); super.open(parameters); } /** * invoke()方法解析一个元组数据,并插入到数据库中。 * @param data 输入的数据 * @throws Exception */ @Override public void invoke(Tuple15<String,String,String,String,String,String,String,String,String,String,String,String,String,String,String> data) throws Exception{ try { for (int i=0;i<15;i++) { String columndata = data.getField(i); preparedStatement.setString(i+1,columndata); } preparedStatement.executeUpdate(); }catch (Exception e){ e.printStackTrace(); } }; /** * close()是tear down的方法,在销毁时执行,关闭连接。 */ @Override public void close() throws Exception { if(preparedStatement != null){ preparedStatement.close(); } if(connection != null){ connection.close(); } super.close(); } }
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义