flink1.3.1
mongo-flink connector : https://github.com/mongo-flink/mongo-flink
mongodb-cdc
官文:
https://ververica.github.io/flink-cdc-connectors/master/content/connectors/mongodb-cdc.html
依赖:
<!-- https://mvnrepository.com/artifact/com.ververica/flink-connector-mongodb-cdc --> <!--doc "https://ververica.github.io/flink-cdc-connectors/master/content/connectors/mongodb-cdc.html--> <dependency> <groupId>com.ververica</groupId> <artifactId>flink-connector-mongodb-cdc</artifactId> <version>2.2.0</version> </dependency> <dependency> <groupId>com.alibaba</groupId> <artifactId>fastjson</artifactId> <version>1.2.79</version> </dependency>
package com.sea.flink.sea.cdc; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONObject; import com.ververica.cdc.connectors.mongodb.MongoDBSource; import com.ververica.cdc.debezium.DebeziumDeserializationSchema; import com.ververica.cdc.debezium.DebeziumSourceFunction; import com.ververica.cdc.debezium.JsonDebeziumDeserializationSchema; import com.ververica.cdc.debezium.StringDebeziumDeserializationSchema; import lombok.Data; import org.apache.flink.api.common.functions.MapFunction; import org.apache.flink.api.common.typeinfo.BasicTypeInfo; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.util.Collector; import org.apache.kafka.connect.data.Struct; import org.apache.kafka.connect.source.SourceRecord; import java.util.Date; /*************************** *<pre> * @PACKAGE : com.sea.flink.sea.cdc * * @Author : Sea * * @Date : 6/13/22 5:40 PM * * @Desc : *</pre> ***************************/ public class MongoCDCTest { public static void main(String[] args) throws Exception { DebeziumSourceFunction<String> sourceFunction = MongoDBSource.<String>builder() .hosts("192.160.45.160:27017") .username("root") .password("root") .databaseList("seappool") // set captured database, support regex .collectionList("seappool.test") //set captured collections, support regex // .deserializer(new JsonDebeziumDeserializationSchema()) .deserializer(new MyMongoCDCDeserialazation()) //自定义解析器 .copyExisting(true) //是否拷贝之前的所有的数据,默认为true .build(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.addSource(sourceFunction) /*.map(new MapFunction<String, JSONObject>() { @Override public Object map(String line) throws Exception { JSONObject source = JSON.parseObject(line); JSONObject data = source.getJSONObject("fullDocument"); Object id = data.get("_id"); if(!(id instanceof String)){ data.put("_id",((JSONObject)id).getString("$oid")); } String type = source.getString("operationType"); JSONObject ns = source.getJSONObject("ns"); String database = ns.getString("db"); String table = ns.getString("coll"); JSONObject result = new JSONObject(); result.put("database",database); result.put("table",database); result.put("data",data); result.put("type","replace".equalsIgnoreCase(type)?"update":type); return result; } })*/ //自定义解析 .map(new MapFunction<String, JSONObject>() { @Override public JSONObject map(String source) throws Exception { JSONObject data= JSONObject.parseObject(source); JSONObject info = data.getJSONObject("data"); Object id = info.get("_id"); if((id instanceof JSONObject)){ System.err.println(id.getClass()); info.put("_id",((JSONObject)id).getString("$oid")); } data.put("data",info); //SeaBean seaBean = info.toJavaObject(SeaBean.class); return data; } }) .print().setParallelism(1); // use parallelism 1 for sink to keep message ordering env.execute(); } @Data public static class SeaBean{ private String _id; private String ss; private Date time; } /** * 封装的数据格式 * { * "database":"", * "tableName":"", * "data":{"id":"","tm_name":""....}, * "type":"c u d", * "ts":156456135615 * } */ public static class MyMongoCDCDeserialazation implements DebeziumDeserializationSchema<String> { public void deserialize(SourceRecord record, Collector<String> out) throws Exception { JSONObject result = new JSONObject(); Object copy = record.sourceOffset().get("copy"); if(copy==null) {copy="false";} //1.获取库名&表名 String topic = record.topic(); String[] fields = topic.split("\\."); String database = fields[0]; String tableName = fields[1]; //3.获取"fullDocument"数据 Struct value = (Struct) record.value(); Object data = value.get("fullDocument"); //4.获取操作类型 String type = value.getString("operationType"); type = "replace".equalsIgnoreCase(type)?"update":type; result.put("database", database); result.put("tableName", tableName); result.put("type",type); result.put("data",data); result.put("copy",copy); out.collect(result.toJSONString()); } public TypeInformation<String> getProducedType() { return BasicTypeInfo.STRING_TYPE_INFO; } } }