Flink同步kafka到iceberg(cos存储)
一、flink到logger
1、source
create table source_table (
id bigint comment '唯一编号'
,order_number bigint comment '订单编号'
,update_timestamp timestamp_ltz metadata from 'timestamp'
,primary key (id, order_number) not enforced
) with (
'connector' = 'kafka'
,'topic' = 'topic'
,'properties.bootstrap.servers' = '127.0.0.1:9092'
,'scan.startup.mode' = 'latest-offset'
,'key.format' = 'json'
,'key.json.fail-on-missing-field' = 'false'
,'key.fields' = 'id;order_number'
,'key.json.ignore-parse-errors' = 'true'
,'value.format' = 'debezium-json'
,'value.debezium-json.ignore-parse-errors' = 'true'
,'value.debezium-json.encode.decimal-as-plain-number' = 'true'
);
2、sink logger
CREATE TABLE sink_test_wang2(
id bigint comment '唯一编号'
,order_number bigint comment '订单编号'
,update_timestamp timestamp_ltz comment '更新时间戳'
,primary key (id, order_number) not enforced
) WITH (
'connector' = 'logger',
'all-changelog-mode' = 'true'
);
3、写入
insert into sink_test_wang2
select id
,order_number
,update_timestamp
from source_table /*+ OPTIONS('properties.group.id'='testwang') */;
二、kafka到iceberg
1、目标源
create table sink_cos_table (
id bigint comment '唯一编号'
,order_number bigint comment '订单编号'
,update_timestamp timestamp_ltz comment '更新时间戳'
,primary key (id, order_number) not enforced
)
with (
'connector' = 'iceberg'
,'warehouse'='cosn://cos桶名称/test_wang'
,'catalog-type' = 'hadoop'
,'catalog-name'='hadoop'
,'catalog-database' = 'data_lake_ods_test'
,'catalog-table' = 'test_kafka_table'
,'format-version' = '2'
,'write.upsert.enabled' = 'true'
,'table.drop.base-path.enabled' = 'true'
,'engine.hive.enabled' = 'true'
);
2、写入
insert into sink_cos_table
select id
,order_number
,pay_number
,update_timestamp
from source_table /*+ OPTIONS('properties.group.id'='read-oceanus-wangshida') */;