sqoop

1.将mysql---->hdfs

    bin/sqoop import \
    --connect jdbc:mysql://hadoop:3306/test \
    --username root \
    --password root \
    --table my_user \
    --target-dir /user/hive/warehouse/my_user \
    --delete-target-dir \
    --num-mappers 1 \
    --fields-terminated-by "\t"

    target-dir:HDFS输出目录
    delete-target-dir:如果上面输出目录存在,就先删除
    num-mappers:设置map个数为1,默认情况下map个数是4,即会在输出目录生成4个文件
    fields-terminated-by "\t":指定列分隔符为 \t

===========================================================================
                
            1、指定具体列(num-mappers为2,生成2个文件)
            $ bin/sqoop import \
            --connect jdbc:mysql://hadoop-senior.ibeifeng.com:3306/test \
            --username root \
            --password 123456 \
            --table my_user \
            --target-dir /user/hive/warehouse/my_user \
            --delete-target-dir \
            --num-mappers 2 \
            --fields-terminated-by "\t" \
            --columns id,passwd

            ---------------------------

            2、用where指定条件
            $ bin/sqoop import \
            --connect jdbc:mysql://hadoop-senior.ibeifeng.com:3306/test \
            --username root \
            --password 123456 \
            --table my_user \
            --target-dir /user/hive/warehouse/my_user \
            --delete-target-dir \
            --num-mappers 1 \
            --fields-terminated-by "\t" \
            --columns  id,passwd \
            --where "id<=3"

            --------------------------

            3、把select语句的查询结果导入,必需包含'$CONDITIONS'在WHERE子句,否则报错
            --query "select id,name from my_user where id>=3 and $CONDITIONS"

            $ bin/sqoop import \
            --connect jdbc:mysql://hadoop-senior.ibeifeng.com:3306/test \
            --username root \
            --password 123456 \
            --target-dir /user/hive/warehouse/my_user \
            --delete-target-dir \
            --num-mappers 1 \
            --fields-terminated-by "\t" \
            --query 'select id,account from my_user where id>=3 and $CONDITIONS'

=======================================================================================

2.将hdfs----->mysql
    
    1、先创建一个mysql表
    CREATE TABLE `hive2mysql` (
      `id` tinyint(4) PRIMARY KEY AUTO_INCREMENT,
      `account` varchar(255),
      `passwd` varchar(255)
    );

    2、从hive或者hdfs导入到Mysql表
    $ bin/sqoop export \
    --connect jdbc:mysql://hadoop-senior.ibeifeng.com:3306/test \
    --username root \
    --password 123456 \
    --table hive2mysql \
    --num-mappers 1 \
    --export-dir /user/hive/warehouse/test.db/my_user \
    --input-fields-terminated-by "\t"


3.将mysql---->hive

        1、先要创建好Hive表
        hive> create database test;
        hive> use test;
        CREATE TABLE test.my_user (
        id int,
        account string,
        passwd string
        )row format delimited fields terminated by "\t";

        2、导入数据到Hive
        bin/sqoop import \
        --connect jdbc:mysql://hadoop-senior.ibeifeng.com:3306/test \
        --username root \
        --password 123456 \
        --table my_user \
        --num-mappers 1 \
        --hive-import \
        --hive-database test \
        --hive-table my_user \
        --fields-terminated-by "\t" \
        --delete-target-dir \
        --hive-overwrite 

4.将hive----->mysql
    
    
5.sqoop脚本文件的编写
        1、创建一个opt脚本文件(注意:必需要换行)
        vi job1.opt

        export
        --connect
        jdbc:mysql://hadoop-senior.ibeifeng.com:3306/test
        --username
        root
        --password
        123456
        --table
        hive2mysql
        --num-mappers
        1
        --export-dir
        /user/hive/warehouse/test.db/my_user
        --input-fields-terminated-by
        "\t"
            
        2、使用sqoop执行这个文件
        ** 删除掉表中数据,避免主键重复
        $ bin/sqoop  --options-file job1.opt

 

posted @ 2019-04-27 14:54  问题不大1  阅读(248)  评论(0编辑  收藏  举报