DataX批量增量同步pg库表数据
批量pg2pg增量同步
DataX.json配置文件:pg2pg_increment_sync.json
{
"job": {
"setting": {
"speed": {
"channel": 1
}
},
"content": [
{
"reader": {
"name": "postgresqlreader",
"parameter": {
"username": "$USERNAME",
"password": "$PASSWORD",
"connection": [
{
"jdbcUrl": ["jdbc:postgresql://$HOST:$PORT/$DATABASE"],
"querySql": ["select * from ysyt.$TABLE where data_time >= current_date - interval '1 day' and data_time < current_date"]
}
]
}
},
"writer": {
"name": "postgresqlwriter",
"parameter": {
"username": "$USERNAME",
"password": "$PASSWORD",
"preSql": ["delete from data.$TABLE where data_time >= current_date - interval '1 day' and data_time < current_date"],
"connection": [
{
"jdbcUrl": "jdbc:postgresql://$HOST:$PORT/$DATABASE",
"table": ["data.$TABLE"]
}
]
}
}
}
]
}
}
存放迁移表名文件:temp.txt
table1
table2
table3
Shell脚本:temp.sh
$USERNAME='xxx'
$PASSWORD='xxx'
$HOST='xxx'
$PORT='xxx'
$DATABASE='xxx'
for i in $*
do
echo "开始增量同步${i}表数据,start `date '+%Y-%m-%d %H:%M:%S'`" >> /.../temp.log
datax.py pg2pg_increment_sync.json -p "-DUSERNAME='$USERNAME' -DPASSWORD='$PASSWORD' -DHOST='$HOST' -DPORT='$PORT' -DDATABASE='$DATABASE' -DTABLE='$i'" >> /.../temp.log
echo "增量同步${i}表数据完成,end `date '+%Y-%m-%d %H:%M:%S'`" >> /.../temp.log
done
执行shell脚本命令:
cat temp.txt | xargs sh temp.sh
配置crontab,每天定时调度:
0 7 * * * cat /.../temp.txt | xargs bash /.../temp.sh >/dev/null 2>&1 &
暂定这种方法