剑道第一仙

导航

< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

统计

【flink学习系列2】消费kafka数据sink到Postgresql

复制代码
import java.util.Properties;

import org.apache.flink.api.common.RuntimeExecutionMode;
import org.apache.flink.api.common.functions.FlatMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.api.java.tuple.Tuple15;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;

/**
 * Desc 演示Flink-Connectors-KafkaComsumer/Source
 */
public class SinkDemoKafkaToPostgresql {
    public static void main(String[] args) throws Exception {
        //TODO 0.env
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setRuntimeMode(RuntimeExecutionMode.AUTOMATIC);

        //TODO 1.source
        //准备kafka连接参数
        Properties props  = new Properties();
        props.setProperty("bootstrap.servers", "192.168.78.203:9092,192.168.78.204:9092,192.168.78.205:9092");//集群地址
        props.setProperty("group.id", "flink2");//消费者组id
        props.setProperty("auto.offset.reset","earliest");//latest有offset记录从记录位置开始消费,没有记录从最新的/最后的消息开始消费 /earliest有offset记录从记录位置开始消费,没有记录从最早的/最开始的消息开始消费
        props.setProperty("flink.partition-discovery.interval-millis","5000");//会开启一个后台线程每隔5s检测一下Kafka的分区情况,实现动态分区检测
        props.setProperty("enable.auto.commit", "true");//自动提交(提交到默认主题,后续学习了Checkpoint后随着Checkpoint存储在Checkpoint和默认主题中)
        props.setProperty("auto.commit.interval.ms", "2000");//自动提交的时间间隔
        //使用连接参数创建FlinkKafkaConsumer/kafkaSource
        FlinkKafkaConsumer<String> kafkaSource = new FlinkKafkaConsumer<String>("FileToKafka", new SimpleStringSchema(), props);
        //使用kafkaSource
//        DataStream<String> kafkaDS = env.addSource(kafkaSource).setParallelism(4);

        //TODO 2.transformation
        DataStream<Tuple15<String, String, String,String, String, String,String, String, String,String, String, String,String, String, String>> messageStream = env.addSource(kafkaSource).flatMap(new FlatMapFunction<String, Tuple15<String, String, String,String, String, String,String, String, String,String, String, String,String, String, String>>() {
            @Override
            public void flatMap(String value, Collector<Tuple15<String, String, String,String, String, String,String, String, String,String, String, String,String, String, String>> out) throws Exception {
                //value就是每一行数据
                String[] arr = value.split("\\|");
//                System.out.println("=========="+arr.length+"|"+arr[14]);
                if (arr.length > 13) {
                    out.collect(new Tuple15<>(arr[0],arr[1],arr[2],arr[3],arr[4],arr[5],arr[6],arr[7],arr[8],arr[9],arr[10],arr[11],arr[12],arr[13],arr[14]));
                }
            }
        }).setParallelism(4);
        
        //TODO 3.sink
//        messageStream.print();
        messageStream.addSink(new PostgreSQLSink()).setParallelism(4);
        //TODO 4.execute
        env.execute();
    }
}
复制代码

 

复制代码
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;

import org.apache.flink.api.java.tuple.Tuple15;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;


public class PostgreSQLSink extends RichSinkFunction<Tuple15<String,String,String,String,String,String,String,String,String,String,String,String,String,String,String>> {

    private static final long serialVersionUID = 1L;

    private Connection connection;
    private PreparedStatement preparedStatement;
    /**
     * open方法是初始化方法,会在invoke方法之前执行,执行一次。
     */
    @Override
    public void open(Configuration parameters) throws Exception {
        // JDBC连接信息
        String USERNAME = "test_user" ;
        String PASSWORD = "abc123";
        String DRIVERNAME = "org.postgresql.Driver";
        String DBURL = "jdbc:postgresql://192.168.1.203/test_db";
        // 加载JDBC驱动
        Class.forName(DRIVERNAME);
        // 获取数据库连接
        connection = DriverManager.getConnection(DBURL,USERNAME,PASSWORD);
        String sql = "insert into kafka_ms_sffdata(comcode, organcode, deskdate,type,currency,riskcode,clausetype,kindcode,businessnature,policyno,insuredtype,amt,agentcode,startdate,sffkindcode) values (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)";
        preparedStatement = connection.prepareStatement(sql);
        super.open(parameters);
    }

    /**
     * invoke()方法解析一个元组数据,并插入到数据库中。
     * @param data 输入的数据
     * @throws Exception
     */
    
    @Override
    public void invoke(Tuple15<String,String,String,String,String,String,String,String,String,String,String,String,String,String,String> data) throws Exception{
        try {
            for (int i=0;i<15;i++) {
                String columndata = data.getField(i);
                preparedStatement.setString(i+1,columndata);
            }
            preparedStatement.executeUpdate();
        }catch (Exception e){
            e.printStackTrace();
        }

    };

    /**
     * close()是tear down的方法,在销毁时执行,关闭连接。
     */
    @Override
    public void close() throws Exception {
        if(preparedStatement != null){
            preparedStatement.close();
        }
        if(connection != null){
            connection.close();
        }
        super.close();
    }
}
复制代码

 

posted on   剑道第一仙  阅读(735)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· TypeScript + Deepseek 打造卜卦网站:技术与玄学的结合
· 阿里巴巴 QwQ-32B真的超越了 DeepSeek R-1吗?
· 【译】Visual Studio 中新的强大生产力特性
· 【设计模式】告别冗长if-else语句:使用策略模式优化代码结构
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
点击右上角即可分享
微信分享提示