DataX基本使用
DataX基本使用
1.打印输入流在控制台
获取模板
datax.py -r 输入数据源名字 -w 输出数据源名字(可以到官网上找到准确的名字)
如:datax.py -r mysqlreader -w hbase11xwriter
编写stream2stream.json文件
streamreader --> streamwriter
{
"job": {
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"column": [
{"type":"string","value":"张三"},
{"type":"string","value":"李四"},
],
"sliceRecordCount": "2"
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"encoding": "utf-8",
"print": true
}
}
}
],
"setting": {
"speed": {
"channel": "2"
}
}
}
}
执行:datax.py stream2stream.json
2.MySQL 到 MySQL 同步
先建库建表
编写mysql2mysql.json文件
mysqlreader-->mysqlwriter
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://master01:3306/students"],
"table": ["student"]
}
],
"password": "123456",
"username": "root"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master01:3306/students2?useUnicode=true&characterEncoding=utf8",
"table": ["datatostudent"]
}
],
"password": "123456",
"username": "root",
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
执行:datax.py mysql2mysql.json
3.mysql到hdfs同步
先在hdfs上建好文件写入路径hadoop fs -mkdir ...
编写mysql2hdfs.json文件
mysqlreader-->hdfswriter
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8"],
"table": ["student"]
}
],
"password": "123456",
"username": "root"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{"name":"id","type":"int"},
{"name":"name","type":"string"},
{"name":"age","type":"int"},
{"name":"gender","type":"string"},
{"name":"clazz","type":"string"},
{"name":"last_mod","type":"date"}
],
"defaultFS": "hdfs://master01:9000",
"fieldDelimiter": ",",
"fileName": "datastudent",
"fileType": "text",
"path": "/shujia/bigdata19/dataxoutput",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
执行:datax.py mysql2hdfs.json
4.mysql到hive同步
创建表
CREATE EXTERNAL TABLE IF NOT EXISTS dataxstudent(
id BIGINT,
name STRING,
age INT,
gender STRING,
clazz STRING,
last_mod STRING
)
comment '学生表'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE
编写mysql2hive.json文件
mysqlreader--hive(hdfswriter)
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection": [
{
"jdbcUrl": ["jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8"],
"table": ["student"]
}
],
"password": "123456",
"username": "root"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{ "name":"id",
"type":"bigint"
},
{
"name":"name",
"type":"string"
},
{
"name":"age",
"type":"int"
},
{
"name":"gender",
"type":"string"
},
{
"name":"clazz",
"type":"string"
},
{
"name":"last_mod",
"type":"string"
}
],
"defaultFS": "hdfs://master01:9000",
"fieldDelimiter": ",",
"fileName": "student",
"fileType": "text",
"path": "/user/hive/warehouse/dataxstudent",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "3"
}
}
}
}
5.Mysql到hbase
在hbase中创建一个表
编写mysql2hbase.json文件
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8"
],
"table": ["student"]
}
],
"password": "123456",
"username": "root",
}
},
"writer": {
"name": "hbase11xwriter",
"parameter": {
"column": [
{"index":1,"name":"cf1:name","type":"string"},
{"index":2,"name":"cf1:age","type":"string"},
{"index":3,"name":"cf1:gender","type":"string"},
{"index":5,"name":"cf1:last_mod","type":"string"}
],
"encoding": "UTF-8",
"hbaseConfig": {
"hbase.zookeeper.quorum": "master01:2181,node1:2181,node2:2181"
},
"mode": "normal",
"rowkeyColumn": [
{"index":0,"type":"string"},
{"index":-1,"type":"string","value":"_"},
{"index":4,"type":"string"}
],
"table": "dataxstudent"
}
}
}
],
"setting": {
"speed": {
"channel": "1"
}
}
}
}
6.hbase到mysql
先清空hbase中原来以id和clazz作为rowkey的表,修改上面配置文件,重新以id为rowkey从MySQL同步到hbase
获取模板
datax.py -r hbase11xreader -w mysqlwriter
新建一个student2表
编写hbase2mysql.json文件
{
"job": {
"content": [
{
"reader": {
"name": "hbase11xreader",
"parameter": {
"column": [
{
"name":"rowkey",
"type":"string"
},
{
"name":"cf1:name",
"type":"string"
},
{
"name":"cf1:age",
"type":"string"
},
{
"name":"cf1:gender",
"type":"string"
},
{
"name":"cf1:clazz",
"type":"string"
},
{
"name":"cf1:last_mod",
"type":"string"
}
],
"encoding": "UTF-8",
"hbaseConfig": {
"hbase.zookeeper.quorum": "master01:2181,node1:2181,node2:2181"
},
"mode": "normal",
"table": "dataxstudent"
}
},
"writer": {
"name": "mysqlwriter",
"parameter": {
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection": [
{
"jdbcUrl": "jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8",
"table": ["student2"]
}
],
"password": "123456",
"preSql": ["truncate student2"],
"username": "root",
"writeMode": "insert"
}
}
}
],
"setting": {
"speed": {
"channel": "3"
}
}
}
}
7.mysql增量同步到hive
需要注意的部分就是:where
(条件筛选)
编写mysqlzheng2hive.json文件
{
"job": {
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"column": [
"id",
"name",
"age",
"gender",
"clazz",
"last_mod"
],
"connection": [
{
"jdbcUrl": [
"jdbc:mysql://master01:3306/students"
],
"table": ["student"]
}
],
"password": "123456",
"username": "root",
"where": "id>3"
}
},
"writer": {
"name": "hdfswriter",
"parameter": {
"column": [
{
"name": "id",
"type": "bigint"
},
{
"name": "name",
"type": "string"
},
{
"name": "age",
"type": "INT"
},
{
"name": "gender",
"type": "string"
},
{
"name": "clazz",
"type": "string"
},
{
"name": "last_mod",
"type": "string"
}
],
"defaultFS": "hdfs://master01:9000",
"fieldDelimiter": ",",
"fileName": "student",
"fileType": "text",
"path": "/user/hive/warehouse/dataxstudent",
"writeMode": "append"
}
}
}
],
"setting": {
"speed": {
"channel": "3"
}
}
}
}
增加一条数据