waterdrop同步mysql数据到hive

一、shell类型任务，提交到yarn集群

#!bin/bash
#=========================数据源配置，只读账号=========================
jdbc_url="数据库ip:3306"
database="数据名"
username="账号"
password="密码"
#=========================目标hive表配置=========================
target_table="hive库.hive表"
target_partion_dt="20220322"
#=========================数据插入===========================
echo "env {
spark.app.name=\"sync_mysql_to_hive_test\"
spark.executor.instances=4
spark.executor.cores=1
spark.executor.memory=\"1024m\"
spark.executor.memoryOverhead=\"154m\"
spark.executor.extraJavaOptions=\"-XX:MaxDirectMemorySize=1G\"
spark.sql.catalogImplementation=\"hive\"
hive.exec.dynamic.partition=\"true\"
hive.exec.dynamic.partition.mode=\"nonstrict\"
}
source {
mysql {
table=\"task_info\"
url=\"jdbc:mysql://${jdbc_url}/${database}?zeroDateTimeBehavior=convertToNull&useServerPrepStmts=false&rewriteBatchedStatements=true&useUnicode=true&characterEncoding=utf8&tinyInt1isBit=false&serverTimezone=Asia/Shanghai\"
user=\"${username}\"
password=\"${password}\"
result_table_name=\"input_table\"
}
}
transform {
sql {
sql=\"select id as id,menu_id as menu_id,REPLACE(REPLACE(task_name,CHAR(10),CHAR(3)),CHAR(13), CHAR(3)) as task_name,REPLACE(REPLACE(task_desc,CHAR(10),CHAR(3)),CHAR(13), CHAR(3)) as task_desc,ds_type as ds_type,ds_id as ds_id,REPLACE(REPLACE(ds_table_name,CHAR(10),CHAR(3)),CHAR(13), CHAR(3)) as ds_table_name,pipeline_type as pipeline_type,init_status as init_status,df_type as df_type,df_id as df_id,REPLACE(REPLACE(df_table_name,CHAR(10),CHAR(3)),CHAR(13), CHAR(3)) as df_table_name,first_exec_time as first_exec_time,end_exec_time as end_exec_time,schedule_cycle as schedule_cycle,schedule_type as schedule_type,REPLACE(REPLACE(creator,CHAR(10),CHAR(3)),CHAR(13), CHAR(3)) as creator,REPLACE(REPLACE(creator_no,CHAR(10),CHAR(3)),CHAR(13), CHAR(3)) as creator_no,audit_status as audit_status,task_status as task_status,is_del as is_del,nezha_task_id as nezha_task_id,nezha_init_id as nezha_init_id,task_version as task_version,audit_pass_time as audit_pass_time,release_status as release_status,project_id as project_id,task_init_step_time as task_init_step_time,task_init_step as task_init_step,create_time as create_time,update_time as update_time,${target_partion_dt} as dt from input_table\"
}
}
sink {
hive {
table=\"${target_table}\"
save_mode=\"overwrite\"
}
}" > temp_clickhouse.config
echo "配置文件内容："
cat temp_clickhouse.config
sh /apps/scripts/waterdrop-2.0.1/bin/start-waterdrop-spark.sh --master yarn --deploy-mode cluster --config temp_clickhouse.config
#=========================完成====================
echo "执行完成"

posted @ 2022-03-23 16:57 黑水滴阅读(519) 评论(0) 编辑收藏举报

会员力量，点亮园子希望

刷新页面返回顶部

黑水滴

waterdrop同步mysql数据到hive

公告