DataX基本使用

DataX基本使用

1.打印输入流在控制台

获取模板

datax.py -r 输入数据源名字 -w 输出数据源名字(可以到官网上找到准确的名字)

image-20220926211147721

如:datax.py -r mysqlreader -w hbase11xwriter

编写stream2stream.json文件

streamreader --> streamwriter
{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "streamreader", 
                    "parameter": {
                        "column": [
                        {"type":"string","value":"张三"},
                        {"type":"string","value":"李四"},
                        ], 
                        "sliceRecordCount": "2"
                    }
                }, 
                "writer": {
                    "name": "streamwriter", 
                    "parameter": {
                        "encoding": "utf-8", 
                        "print": true
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "2"
            }
        }
    }
}

执行:datax.py stream2stream.json

image-20220926203644722

2.MySQL 到 MySQL 同步

先建库建表

编写mysql2mysql.json文件

mysqlreader-->mysqlwriter

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": [
                            "id",
                            "name",
                            "age",
                            "gender",
                            "clazz",
                            "last_mod"
                        ], 
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://master01:3306/students"], 
                                "table": ["student"]
                            }
                        ], 
                        "password": "123456", 
                        "username": "root"
                    }
                }, 
                "writer": {
                    "name": "mysqlwriter", 
                    "parameter": {
                        "column": [
                             "id",
                            "name",
                            "age",
                            "gender",
                            "clazz",
                            "last_mod"
                        ], 
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://master01:3306/students2?useUnicode=true&characterEncoding=utf8", 
                                "table": ["datatostudent"]
                            }
                        ], 
                        "password": "123456",
                        "username": "root", 
                        "writeMode": "insert"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}
执行:datax.py mysql2mysql.json

image-20220926203541448

3.mysql到hdfs同步

先在hdfs上建好文件写入路径hadoop fs -mkdir ...

编写mysql2hdfs.json文件

mysqlreader-->hdfswriter

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": [
                            "id",
                            "name",
                            "age",
                            "gender",
                            "clazz",
                            "last_mod"
                        ], 
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8"], 
                                "table": ["student"]
                            }
                        ], 
                        "password": "123456", 
                        "username": "root"
                    }
                }, 
                "writer": {
                    "name": "hdfswriter", 
                    "parameter": {
                        "column": [
                             {"name":"id","type":"int"},
                             {"name":"name","type":"string"},
                             {"name":"age","type":"int"},
                             {"name":"gender","type":"string"},
                             {"name":"clazz","type":"string"},
                             {"name":"last_mod","type":"date"}
                        ],
                        "defaultFS": "hdfs://master01:9000", 
                        "fieldDelimiter": ",", 
                        "fileName": "datastudent", 
                        "fileType": "text", 
                        "path": "/shujia/bigdata19/dataxoutput", 
                        "writeMode": "append"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

执行:datax.py mysql2hdfs.json

image-20220926203441235

4.mysql到hive同步

创建表

CREATE EXTERNAL TABLE IF NOT EXISTS dataxstudent(
    id BIGINT,
    name STRING,
    age INT,
    gender STRING,
    clazz STRING,
    last_mod STRING
)
comment '学生表'
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
STORED AS TEXTFILE

编写mysql2hive.json文件

mysqlreader--hive(hdfswriter)

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": [
                         	"id",
                            "name",
                            "age",
                            "gender",
                            "clazz",
                            "last_mod"
                        ], 
                        "connection": [
                            {
                                "jdbcUrl": ["jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8"], 
                                "table": ["student"]
                            }
                        ], 
                        "password": "123456", 
                        "username": "root"
                    }
                }, 
                "writer": {
                    "name": "hdfswriter", 
                    "parameter": {
                        "column": [
                            {	"name":"id",
                            	"type":"bigint"
                            },
                            {
                                "name":"name",
                                "type":"string"
                            },
                            {
                                "name":"age",
                                "type":"int"
                            },
                            {
                                "name":"gender",
                                "type":"string"
                            },
                            {
                                "name":"clazz",
                                "type":"string"
                            },
                            {
                                "name":"last_mod",
                                "type":"string"
                            }
                            
                        ],
                        "defaultFS": "hdfs://master01:9000", 
                        "fieldDelimiter": ",", 
                        "fileName": "student", 
                        "fileType": "text", 
                        "path": "/user/hive/warehouse/dataxstudent", 
                        "writeMode": "append"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "3"
            }
        }
    }
}

image-20220926203403468

image-20220926205011454

5.Mysql到hbase

在hbase中创建一个表

image-20220926210356936

编写mysql2hbase.json文件

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": [
                            "id",
                            "name",
                            "age",
                            "gender",
                            "clazz",
                            "last_mod"
                        ], 
                        "connection": [
                            {
                                "jdbcUrl": [
                                    "jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8"
                                ], 
                                "table": ["student"]
                            }
                        ], 
                        "password": "123456", 
                        "username": "root",
                    }
                }, 
                "writer": {
                    "name": "hbase11xwriter", 
                    "parameter": {
                        "column": [
                            {"index":1,"name":"cf1:name","type":"string"},
                            {"index":2,"name":"cf1:age","type":"string"},
                            {"index":3,"name":"cf1:gender","type":"string"},
                          {"index":5,"name":"cf1:last_mod","type":"string"}
                        ], 
                        "encoding": "UTF-8", 
                        "hbaseConfig": {
                            "hbase.zookeeper.quorum": "master01:2181,node1:2181,node2:2181"
                        }, 
                        "mode": "normal", 
                        "rowkeyColumn": [
                            {"index":0,"type":"string"},
                            {"index":-1,"type":"string","value":"_"},
                            {"index":4,"type":"string"}
                        ], 
                        "table": "dataxstudent"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "1"
            }
        }
    }
}

image-20220926210655480

image-20220926210748107

image-20220926210836416

6.hbase到mysql

先清空hbase中原来以id和clazz作为rowkey的表,修改上面配置文件,重新以id为rowkey从MySQL同步到hbase

获取模板

datax.py -r hbase11xreader -w mysqlwriter

新建一个student2表

编写hbase2mysql.json文件

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "hbase11xreader", 
                    "parameter": {
                        "column": [
                            {
                                "name":"rowkey",
                                "type":"string"
                            },
                            {
                                "name":"cf1:name",
                                "type":"string"
                            },
                            {
                                "name":"cf1:age",
                                "type":"string"
                            },
                            {
                                "name":"cf1:gender",
                                "type":"string"
                            },
                            {
                                "name":"cf1:clazz",
                                "type":"string"
                            },
                            {
                                "name":"cf1:last_mod",
                                "type":"string"
                            }
                        ], 
                        "encoding": "UTF-8", 
                        "hbaseConfig": {
                            "hbase.zookeeper.quorum": "master01:2181,node1:2181,node2:2181"
                        }, 
                        "mode": "normal", 
                        "table": "dataxstudent"
                    }
                }, 
                "writer": {
                    "name": "mysqlwriter", 
                    "parameter": {
                        "column": [
                            "id",
                            "name",
                            "age",
                            "gender",
                            "clazz",
                            "last_mod"
                        ], 
                        "connection": [
                            {
                                "jdbcUrl": "jdbc:mysql://master01:3306/students?useUnicode=true&characterEncoding=utf8", 
                                "table": ["student2"]
                            }
                        ], 
                        "password": "123456", 
                        "preSql": ["truncate student2"], 
                        "username": "root", 
                        "writeMode": "insert"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "3"
            }
        }
    }
}

image-20220926215951902

7.mysql增量同步到hive

需要注意的部分就是:where(条件筛选)

编写mysqlzheng2hive.json文件

{
    "job": {
        "content": [
            {
                "reader": {
                    "name": "mysqlreader", 
                    "parameter": {
                        "column": [
                            "id",
                            "name",
                            "age",
                            "gender",
                            "clazz",
                            "last_mod"
                        ], 
                        "connection": [
                            {
                                "jdbcUrl": [
                                    "jdbc:mysql://master01:3306/students"
                                ], 
                                "table": ["student"]
                            }
                        ], 
                        "password": "123456", 
                        "username": "root", 
                        "where": "id>3"
                    }
                }, 
                "writer": {
                    "name": "hdfswriter", 
                    "parameter": {
                        "column": [
                            {
                                "name": "id",
                                "type": "bigint"
                            },
                            {
                                "name": "name",
                                "type": "string"
                            },
                            {
                                "name": "age",
                                "type": "INT"
                            },
                            {
                                "name": "gender",
                                "type": "string"
                            },
                            {
                                "name": "clazz",
                                "type": "string"
                            },
                            {
                                "name": "last_mod",
                                "type": "string"
                            }
                        ],
                        "defaultFS": "hdfs://master01:9000", 
                        "fieldDelimiter": ",", 
                        "fileName": "student", 
                        "fileType": "text", 
                        "path": "/user/hive/warehouse/dataxstudent", 
                        "writeMode": "append"
                    }
                }
            }
        ], 
        "setting": {
            "speed": {
                "channel": "3"
            }
        }
    }
}

增加一条数据

image-20220926221156915

image-20220926221313548

posted @ 2022-09-26 22:30  伍点  阅读(987)  评论(0编辑  收藏  举报