mongoshake:2.6.5 docker
1. 下载镜像
sudo docker pull happysea/mongoshake:2.6.5
2. 同步mongo 数据 (192.168.18.176:27018,192.168.18.176:27019,192.168.18.176:27020)到kafka(192.168.18.51:9092) topic mongosheketopic
sudo docker run -itd --restart=always --name=mongoshake -p 9101:9101 -p 9100:9100 \ -e tunnel_address=seatest_topic@192.168.18.51:9092 \ -e log_dir=./logs/ \ -e sync_mode=incr \ -e mongo_urls=mongodb://root:root@192.168.18.176:27018,192.168.18.176:27019,192.168.18.176:27020 \ -e tunnel_kafka_partition_number=1 \ -e tunnel_message=json \ -e incr_sync_mongo_fetch_method=oplog \ -v /opt/docker/mongoshake/logs:/mongo-shake-v2.6.5/logs:rw \ happysea/mongoshake:2.6.5
2.1 同步到其它地方,自己配置(略)
3. 变量参数
collector.conf 中的配置名, 用 "_" 替换掉点 " . "
eg: tunnel.address 变为 tunnel_address
然后指定 -e tunnel_address=topic@192.168.18.51:9092 即可
详见:
master_quorum = '${master_quorum:=false}' full_sync.http_port = 9101 incr_sync.http_port = 9100 # profiling on net/http/profile # profiling端口,用于查看内部go堆栈。 system_profile_port = 9200 # global log level: debug, info, warning, error. lower level message will be filter log.level = '${log_level:=info}' log.dir = '${log_dir}' log.flush = '${log_flush:=false}' sync_mode = '${sync_mode:=incr}' mongo_urls = '${mongo_urls:=mongodb://root:root@192.168.18.176:27018,192.168.18.176:27019,192.168.18.176:27020}' # please fill the source config server url if source mongodb is sharding. ############# mongo_cs_url = '${mongo_cs_url}' mongo_s_url = '${mongo_s_url}' # enable source ssl mongo_ssl_root_ca_file = '${mongo_ssl_root_ca_file}' tunnel = '${tunnel:=kafka}' ############################################### tunnel.address mgtest1@192.168.18.51:9092 ################################### tunnel.address = '${tunnel_address:=mongosheketest@192.168.18.51:9092}' tunnel.message = '${tunnel_message:=json}' tunnel.kafka.partition_number = '${tunnel_kafka_partition_number:=1}' # canonical_extended_json tunnel.json.format = '${tunnel_json_format}' tunnel.mongo_ssl_root_ca_file = '${tunnel_mongo_ssl_root_ca_file}' mongo_connect_mode = '${mongo_connect_mode:=secondaryPreferred}' filter.namespace.black = '${filter_namespace_black}' filter.namespace.white = '${filter_namespace_white}' filter.pass.special.db = '${filter_pass_special_db}' filter.ddl_enable = '${filter_ddl_enable:=false}' filter.oplog.gids = '${filter_oplog_gids:=false}' # 2.4版本以后不需要配置为源端cs的地址。 checkpoint.storage.url = '${checkpoint_storage_url}' checkpoint.storage.db = '${checkpoint_storage_db:=mongoshake}' # checkpoint collection,s name. # checkpoint存储的表的名字,如果启动多个mongoshake拉取同一个源可以修改这个表名以防止冲突。 checkpoint.storage.collection = '${checkpoint_storage_collection:=ckpt_default}' # set if enable ssl checkpoint.storage.url.mongo_ssl_root_ca_file = '${checkpoint_storage_url_mongo_ssl_root_ca_file}' # 大于给定的时间,如果是则会直接报错退出。 checkpoint.start_position =${checkpoint_start_position:=1970-01-01T00:00:00Z}' transform.namespace = '${transform_namespace}' full_sync.reader.collection_parallel = '${full_sync_reader_collection_parallel:=6}' # the number of document writer thread in each collection. # 同一个表内并发写的线程数,例如,8表示对于同一个表,将会有8个写线程进行并发写入。####### full_sync.reader.write_document_parallel = '${full_sync_reader_write_document_parallel:=8}' # number of documents in a batch insert in a document concurrence # 目的端写入的batch大小,例如,128表示一个线程将会一次聚合128个文档然后再写入。####### full_sync.reader.document_batch_size = '${full_sync_reader_document_batch_size:=128}' # max number of fetching thread per table. default is 1 # 单个表最大拉取的线程数,默认是单线程拉取。需要具备splitVector权限。 # 注意:对单个表来说,仅支持索引对应的value是同种类型,如果有不同类型请勿启用该配置项! full_sync.reader.parallel_thread = '${full_sync_reader_parallel_thread:=1}' # the parallel query index if set full_sync.reader.parallel_thread. index should only has # 1 field. # 如果设置了full_sync.reader.parallel_thread,还需要设置该参数,并行拉取所扫描的index,value # 必须是同种类型。对于副本集,建议设置_id;对于集群版,建议设置shard_key。key只能有1个field。 full_sync.reader.parallel_index = '${full_sync_reader_parallel_index:=_id}' # drop the same name of collection in dest mongodb in full synchronization # 同步时如果目的库存在,是否先删除目的库再进行同步,true表示先删除再同步,false表示不删除。 full_sync.collection_exist_drop = '${full_sync_collection_exist_drop:=true}' # create index option. # background表示创建后台索引。 full_sync.create_index = '${full_sync_create_index:=none}' # convert insert to update when duplicate key found # 如果_id存在在目的库,是否将insert语句修改为update语句。 full_sync.executor.insert_on_dup_update = '${full_sync_executor_insert_on_dup_update:=false}' # filter orphan document for source type is sharding. # 源端是sharding,是否需要过滤orphan文档 full_sync.executor.filter.orphan_document = '${full_sync_executor_filter_orphan_document:=false}' # enable majority write in full sync. # the performance will degrade if enable. # 全量阶段写入端是否启用majority write full_sync.executor.majority_enable = '${full_sync_executor_majority_enable:=false}' # --------------------------- incrmental sync configuration --------------------------- ####################################### incr_sync.mongo_fetch_method ########################################### incr_sync.mongo_fetch_method = '${incr_sync_mongo_fetch_method:=oplog}' incr_sync.change_stream.watch_full_document = '${incr_sync_change_stream_watch_full_document:=false}' incr_sync.oplog.gids = '${incr_sync_oplog_gids}' incr_sync.shard_key = '${incr_sync_shard_key:=collection}' incr_sync.shard_by_object_id_whitelist = '${incr_sync_shard_by_object_id_whitelist}' incr_sync.worker = '${incr_sync_worker:=8}' incr_sync.tunnel.write_thread = '${incr_sync_tunnel_write_thread:=8}' # set the sync delay just like mongodb secondary slaveDelay parameter. unit second. # 设置目的端的延迟,比如延迟源端20分钟,类似MongoDB本身主从同步slaveDelay参数,单位:秒 # 0表示不启用 incr_sync.target_delay = '${incr_sync_target_delay:=0}' # memory queue configuration, plz visit FAQ document to see more details. # do not modify these variables if the performance and resource usage can # meet your needs. # 内部队列的配置参数,如果目前性能足够不建议修改,详细信息参考FAQ。 incr_sync.worker.batch_queue_size = '${incr_sync_worker_batch_queue_size:=64}' incr_sync.adaptive.batching_max_size = '${incr_sync_adaptive_batching_max_size:=1024}' incr_sync.fetcher.buffer_capacity = '${incr_sync_fetcher_buffer_capacity:=256}' incr_sync.executor.upsert = '${incr_sync_executor_upsert:=false}' # oplog changes to Update while Insert found duplicated key (_id or unique-index) # 如果_id存在在目的库,是否将insert语句修改为update语句。 incr_sync.executor.insert_on_dup_update = '${incr_sync_executor_insert_on_dup_update:=false}' incr_sync.conflict_write_to = '${incr_sync_conflict_write_to:=none}' incr_sync.executor.majority_enable = '${incr_sync_executor_majority_enable:=false}' special.source.db.flag = '${special_source_db_flag}'