DataX批量增量同步pg库表数据

批量pg2pg增量同步

DataX.json配置文件:pg2pg_increment_sync.json

{  
    "job": {  
      "setting": {  
        "speed": {  
          "channel": 1  
        }  
      },  
      "content": [  
        {  
          "reader": {  
            "name": "postgresqlreader",  
            "parameter": {  
              "username": "$USERNAME",  
              "password": "$PASSWORD",  
              "connection": [  
                {  
                  "jdbcUrl": ["jdbc:postgresql://$HOST:$PORT/$DATABASE"],  
                  "querySql": ["select * from ysyt.$TABLE where data_time >= current_date - interval '1 day' and data_time < current_date"]  
                }  
              ]  
            }  
          },  
          "writer": {  
            "name": "postgresqlwriter",  
            "parameter": {   
              "username": "$USERNAME",  
              "password": "$PASSWORD",  
              "preSql": ["delete from data.$TABLE where data_time >= current_date - interval '1 day' and data_time < current_date"],  
              "connection": [  
                {  
                  "jdbcUrl": "jdbc:postgresql://$HOST:$PORT/$DATABASE",  
                  "table": ["data.$TABLE"]  
                }  
              ]  
            }  
          }  
        }  
      ]  
    }  
  } 

存放迁移表名文件:temp.txt

table1
table2
table3

Shell脚本:temp.sh

$USERNAME='xxx'
$PASSWORD='xxx'
$HOST='xxx'
$PORT='xxx'
$DATABASE='xxx'

for i in $*
do
	echo "开始增量同步${i}表数据,start `date '+%Y-%m-%d %H:%M:%S'`" >> /.../temp.log
	datax.py pg2pg_increment_sync.json -p "-DUSERNAME='$USERNAME' -DPASSWORD='$PASSWORD' -DHOST='$HOST' -DPORT='$PORT' -DDATABASE='$DATABASE' -DTABLE='$i'" >> /.../temp.log
	echo "增量同步${i}表数据完成,end `date '+%Y-%m-%d %H:%M:%S'`" >> /.../temp.log
done

执行shell脚本命令:

cat temp.txt | xargs sh temp.sh

配置crontab,每天定时调度:

0 7 * * * cat /.../temp.txt | xargs bash /.../temp.sh >/dev/null 2>&1 &

暂定这种方法

posted @ 2024-03-11 12:57  MrSponge  Views(284)  Comments(0Edit  收藏  举报