DataX部署安装

一、准备
系统环境:Windows Server 2008
Python2.7.14.msi
datax.tar.gz
二、安装
Python

 


打开cmd,输入python回车进行测试
DataX
解压datax.tar.gz到指定文件夹。
三、配置
JOB
到D:\datax\job创建*.json文件,配置需要进行数据抽取的表
{
    "job":{
        "content":[
            {
                "reader":{
                    "parameter":{
                        "password":"phisphis",
                        "connection":[
                            {
                                "querySql":[
                                    "select PERSON_NAME, HOUSEHOLD_TYPE, ADDR_AREA from phis.EHR_BASE where ID > $stID"
                                ],
                                "jdbcUrl":[
                                    "jdbc:db2://172.16.19.116:50000/phis"
                                ]
                            }
                        ],
                        "username":"phis2"
                    },
                    "name":"db2reader"
                },
                "writer":{
                    "parameter":{
                        "password":"123456",
                        "column":[
                            "`PERSON_NAME`",
                            "`HOUSEHOLD_TYPE`",
                            "`ADDR_AREA`"
                        ],
                        "connection":[
                            {
                                "jdbcUrl":"jdbc:mysql://11.105.26.130:3306/gw?useUnicode=true&characterEncoding=utf-8",
                                "table":[
                                    "EHR_BASE"
                                ]
                            }
                        ],
                        "writeMode":"replace",
                        "batchSize":1000,
                        "username":"root"
                    },
                    "name":"mysqlwriter"
                }
            }
        ],
        "setting":{
            "speed":{
                "byte":1048576
            }
        }
    }
}
BAT脚本
创建D:\datax\bat目录,在目录下创建bat脚本
@echo off
setlocal enabledelayedexpansion
set TITLE=EHR_BASE
cd bin
(
  echo use gw;
  echo select ID from EHR_BASE order by ID desc LIMIT 1;
) | mysql -uroot -p123456 >EHR_BASE.txt
set line=
rem stID是变量,获取前置机gw数据库中最大的ID,如果DB2数据库中ID>stID,则抽取大余stID的数据
set stID=
for /f "tokens=1" %%i in (EHR_BASE.txt) do (
set /a line+=1
if !line!==2 set stID=%%i
)
set /a stID+=0
set p=" -DstID=%stID%"
:0
rem 下面是暂停5分钟之后再继续执行的命令
ping -n 300 127.0.0.1>nul
rem 下面是python调用datax中的job进行抽数的命令
python D:\datax\bin\datax.py -p %p% D:\datax\job\EHR_BASE.json && goto 0
四、运行
双击编辑好的bat脚本,开始运行

 

posted @ 2019-02-26 10:15  努力学习拼命玩  阅读(429)  评论(0编辑  收藏  举报