flink1.3.1

mongo-flink   connector :    https://github.com/mongo-flink/mongo-flink

mongodb-cdc

官文: 

       https://ververica.github.io/flink-cdc-connectors/master/content/connectors/mongodb-cdc.html

依赖:

        <!-- https://mvnrepository.com/artifact/com.ververica/flink-connector-mongodb-cdc -->
        <!--doc "https://ververica.github.io/flink-cdc-connectors/master/content/connectors/mongodb-cdc.html-->
        <dependency>
            <groupId>com.ververica</groupId>
            <artifactId>flink-connector-mongodb-cdc</artifactId>
            <version>2.2.0</version>
        </dependency>

        <dependency>
            <groupId>com.alibaba</groupId>
            <artifactId>fastjson</artifactId>
            <version>1.2.79</version>
        </dependency>

 

package com.sea.flink.sea.cdc;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import com.ververica.cdc.connectors.mongodb.MongoDBSource;
import com.ververica.cdc.debezium.DebeziumDeserializationSchema;
import com.ververica.cdc.debezium.DebeziumSourceFunction;
import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema;
import com.ververica.cdc.debezium.StringDebeziumDeserializationSchema;
import lombok.Data;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.source.SourceFunction;
import org.apache.flink.util.Collector;
import org.apache.kafka.connect.data.Struct;
import org.apache.kafka.connect.source.SourceRecord;

import java.util.Date;

/***************************
 *<pre>
 * @PACKAGE : com.sea.flink.sea.cdc
 *
 * @Author    :  Sea
 *
 * @Date    : 6/13/22 5:40 PM
 *
 * @Desc    :
 *</pre>
 ***************************/
public class MongoCDCTest {

    public static void main(String[] args) throws Exception {
        DebeziumSourceFunction<String> sourceFunction = MongoDBSource.<String>builder()
                .hosts("192.160.45.160:27017")
                .username("root")
                .password("root")
                .databaseList("seappool") // set captured database, support regex
                .collectionList("seappool.test") //set captured collections, support regex
//                .deserializer(new JsonDebeziumDeserializationSchema())
                .deserializer(new MyMongoCDCDeserialazation()) //自定义解析器
                .copyExisting(true) //是否拷贝之前的所有的数据,默认为true
                .build();

        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.addSource(sourceFunction)
                /*.map(new MapFunction<String, JSONObject>() {
                    @Override
                    public Object map(String line) throws Exception {
                        JSONObject source = JSON.parseObject(line);
                        JSONObject data = source.getJSONObject("fullDocument");
                        Object id = data.get("_id");
                        if(!(id instanceof String)){
                            data.put("_id",((JSONObject)id).getString("$oid"));
                        }
                        String type = source.getString("operationType");
                        JSONObject ns = source.getJSONObject("ns");
                        String database = ns.getString("db");
                        String table = ns.getString("coll");
                        JSONObject result = new JSONObject();
                        result.put("database",database);
                        result.put("table",database);
                        result.put("data",data);
                        result.put("type","replace".equalsIgnoreCase(type)?"update":type);
                        return result;
                    }
                })*/
                //自定义解析
                .map(new MapFunction<String, JSONObject>() {
                    @Override
                    public JSONObject map(String source) throws Exception {
                        JSONObject data= JSONObject.parseObject(source);
                        JSONObject info = data.getJSONObject("data");
                        Object id = info.get("_id");
                        if((id instanceof JSONObject)){
                            System.err.println(id.getClass());
                            info.put("_id",((JSONObject)id).getString("$oid"));
                        }
                        data.put("data",info);
                        //SeaBean seaBean = info.toJavaObject(SeaBean.class);
                        return data;
                    }
                })

                .print().setParallelism(1); // use parallelism 1 for sink to keep message ordering
        env.execute();
    }


    @Data
    public static  class  SeaBean{
        private String _id;
        private String ss;
        private Date time;
    }



    /**
     * 封装的数据格式
     * {
     * "database":"",
     * "tableName":"",
     * "data":{"id":"","tm_name":""....},
     * "type":"c u d",
     * "ts":156456135615
     * }
     */
    public static class MyMongoCDCDeserialazation implements DebeziumDeserializationSchema<String> {

        public void deserialize(SourceRecord record, Collector<String> out) throws Exception {
            JSONObject result = new JSONObject();
            Object copy = record.sourceOffset().get("copy");
            if(copy==null) {copy="false";}
            //1.获取库名&表名
            String topic = record.topic();
            String[] fields = topic.split("\\.");
            String database = fields[0];
            String tableName = fields[1];
            //3.获取"fullDocument"数据
            Struct value = (Struct) record.value();
            Object data = value.get("fullDocument");
            //4.获取操作类型
            String type = value.getString("operationType");
            type = "replace".equalsIgnoreCase(type)?"update":type;
            result.put("database", database);
            result.put("tableName", tableName);
            result.put("type",type);
            result.put("data",data);
            result.put("copy",copy);
            out.collect(result.toJSONString());
        }

        public TypeInformation<String> getProducedType() {
            return BasicTypeInfo.STRING_TYPE_INFO;
        }
    }

}

 

posted on 2022-06-14 10:00  lshan  阅读(1187)  评论(1编辑  收藏  举报