CDH5.12.1利用Sqoop将mysql数据导入hive

环境: CDH5.12.1 、 centos7

数据库类型

1、权限问题

dfs.permissions 设置为false(可以在配置界面勾选掉)

2、关闭安全模式,允许读写

hdfs dfsadmin -safemode leave

3、创建hive表

drop table if exists default.opportunity;

create table default.opportunity(id BIGINT,consultant_account STRING,first_consultant_account STRING,group_id BIGINT,
first_group_id BIGINT,sale_department_id BIGINT,first_sale_department_id BIGINT,legion_id BIGINT, first_legion_id BIGINT,
business_id  BIGINT,student_id BIGINT,province_id BIGINT,city_id BIGINT,create_user STRING, online_group_id BIGINT,
online_center_id BIGINT, create_time TIMESTAMP,allocate_time TIMESTAMP,apply_time TIMESTAMP,  auto_apply_time TIMESTAMP
)ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

4、sqoop全量导入数据

sqoop import \
--connect jdbc:mysql://192.168.75.101:3306/dragnet \
--username root \
--password yang156122 \
--query 'select  id,consultant_account ,first_consultant_account,group_id, first_group_id,sale_department_id,first_sale_department_id,legion_id,first_legion_id,business_id,student_id,province_id,city_id,create_user,online_group_id,online_center_id,create_time,allocate_time,apply_time,auto_apply_time from opportunity where $CONDITIONS' \
--target-dir /user/sqoop2/opportunity \
--delete-target-dir \
--num-mappers 1 \
--compress \
--compression-codec org.apache.hadoop.io.compress.SnappyCodec \
--direct \
--fields-terminated-by '\t'

#建表语句

//##鍒涘缓hive鍔ㄦ€佸垎鍖鸿〃

use advertise_ods;
set hive.exec.dynamic.partition=true;

set hive.exec.dynamic.partition.mode=nonstrict;

create external table  ods_ad_user_visit_history
(
 `id` bigint  ,
  `code` string   ,
  `cookie` string ,
  `key_word` string ,
  `ip` string ,
  `province_id` bigint ,
  `province` string ,
  `city_id` bigint ,
  `city` string  ,
  `terminal` bigint ,
  `system` bigint ,
  `site_id` bigint,
  `plan_id` bigint  ,
  `main_url` string ,
  `full_url` string  ,
  `page_name` string  ,
  `channel_id` bigint  ,
  `channel_name` string ,
  `project_id` bigint  ,
  `project_name` string ,
  `ocpc_config` string ,
  `creative_name` string  ,
  `tenant_id` bigint ,
  `create_ts` string,
  `update_ts` string
) partitioned by (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as  textfile
location "/ods/advertise/db/ad_user_visit_history";



create external table ad_campaign
(
    `id`                   bigint,
    `campaign_number`      string,
    `campaign_name`        string,
    `channel_id`           bigint,
    `channel_name`         string,
    `account_id`           int,
    `account_name`         string,
    `region`               string,
    `pc_site_id`           bigint,
    `pc_site_number`       string,
    `mobile_site_id`       bigint,
    `mobile_site_number`   string,
    `pc_page_id`           bigint,
    `pc_page_number`       string ,
     `mobile_page_id`      bigint ,
    `mobile_page_number`   string,
    `pc_url`               string,
    `mobile_url`           string,
    `obverse_sign`         int,
    `reverse_sign`         int,
    `promoted_campaign_id` string,
    `mini_program_id`      bigint,
    `mini_program_name`    string,
    `start_sign`           int,
    `state`                int,
    `create_user_id`       bigint,
    `create_user_account`  string,
    `create_time`          string,
    `update_user_id`       bigint,
    `update_user_account`  string,
    `update_time`          string,
    `tenant_id`            bigint
) partitioned by (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as  textfile
location "/ods/advertise/db/ad_campaign";



create external table ods_ad_info_card
(
  `id` bigint,
  `card_type` tinyint,
  `data_source` tinyint ,
  `consultant_type` tinyint,
  `owner_user_id` bigint,
  `request_url` string,
  `request_ip` string,
  `site_id` bigint,
  `account_id` int,
  `account_name` string,
  `customer_sign` string,
  `customer_name` string,
  `city_id` int,
  `city_name` string,
  `province_id` int,
  `province_name` string,
  `phone` string,
  `phone1` string,
  `qq` string,
  `wechat` string,
  `project_id` bigint ,
  `project_name` string,
  `remarks` string,
  `date` date ,
  `submit_time` string ,
  `update_time` string ,
  `tenant_id` bigint
) partitioned by (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as  textfile
location "/ods/advertise/db/ad_info_card";



create external table ad_account_bind
(
  `id` int,
  `account_id` int ,
  `platform_user_id` bigint ,
  `platform_user_account` string,
  `platform_user_org_id1` bigint,
  `platform_user_org_id2` bigint,
  `state` tinyint ,
  `create_user_id` bigint ,
  `create_user_account` string,
  `create_time` string ,
  `update_user_id` bigint ,
  `update_user_account` string,
  `update_time` string ,
  `tenant_id` bigint
) partitioned by (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as  textfile
location "/ods/advertise/db/ad_account_bind";



/******************privilege_center***************************/
use privilege_center_ods;
create external table  ods_u_organization
(
  `id` bigint,
  `tenant_id` bigint ,
  `org_name` string,
  `org_level` int ,
  `parent_id` bigint,
  `org_full_id` string,
  `org_full_name` string,
  `defaulted` tinyint ,
  `order_num` int ,
  `delete_flag` tinyint,
  `creator` string,
  `create_time` string,
  `updator` string,
  `update_time` string
) partitioned by (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as  textfile
location "/ods/privilege_center/db/u_organization";


create external table  ods_u_user
(
  `id` bigint,
  `tenant_id` bigint ,
  `tenant_name` string,
  `user_name` string,
  `nick_name` string,
  `user_image` string,
  `user_account` string,
  `user_pass` string,
  `salt_value` string,
  `phone_number` string,
  `job_number` string,
  `delete_flag` tinyint,
  `root_flag` tinyint ,
  `user_status` tinyint ,
  `org_id` bigint,
  `org_position` tinyint ,
  `user_mail` string ,
  `creator` string,
  `create_time` string ,
  `updator` string ,
  `update_time` string
) partitioned by (dt STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' stored as  textfile
location "/ods/privilege_center/db/u_user";
View Code

 

#mysq导入至hdfs

#! /bin/bash
if [ -n "$2" ] ;then
    do_date=$2
else
    do_date=`date -d '-1 day' +%F`
fi

import_data(){
sqoop import \
--connect jdbc:mysql://localhost:3306/privilege_center?useSSL=false \
--username root \
--password 'root' \
--target-dir /ods/privilege_center/db/$1/$do_date \
--delete-target-dir \
--query "$2 and \$CONDITIONS" \
--num-mappers 1 \
--fields-terminated-by '\t' \
--compress \
--compression-codec org.apache.hadoop.io.compress.SnappyCodec \
--null-string '\\N' \
--null-non-string '\\N'
}

import_u_user(){
  import_data u_user "select
                                        id, 
                                        tenant_id,
                                        tenant_name,
                                        user_name,
                                        nick_name,
                                        user_image,
                                        user_account,
                                        user_pass,
                                        salt_value,
                                        phone_number,
                                        job_number,
                                        delete_flag,
                                        root_flag,
                                        user_status,
                                        org_id,
                                        org_position,
                                        user_mail,
                                        creator,
                                        DATE_FORMAT(create_time, '%Y-%m-%d %k:%i:%s') as create_time,
                                        updator,
                                        DATE_FORMAT(update_time, '%Y-%m-%d %k:%i:%s') as update_time
                                        from u_user
                                        where 1=1
"
}



import_u_organization(){
  import_data u_organization "select
                                        id, 
                                        tenant_id,
                                        org_name,
                                        org_level,
                                        parent_id,
                                        org_full_id,
                                        org_full_name,
                                        defaulted,
                                        order_num,
                                        delete_flag,
                                        creator,
                                        DATE_FORMAT(create_time, '%Y-%m-%d %k:%i:%s') as create_time,
                                        updator,
                                        DATE_FORMAT(update_time, '%Y-%m-%d %k:%i:%s') as update_time
                                        from u_organization
                                        where 1=1
"
}


case $1 in
        "all")
                import_u_user
                import_u_organization
;;
esac
View Code

#hdfs到hive

 

#!/bin/bash
APP=privilege_center
hive=/usr/bin/hive
# 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n "$2" ] ;then
    do_date=$2
else 
    do_date=`date -d "-1 day" +%F`
fi

sql1="
load data inpath '/ods/privilege_center/db/u_organization/$do_date' OVERWRITE into table ${APP}.ods_u_organization partition(dt='$do_date');
load data inpath '/ods/privilege_center/db/u_user/$do_date' OVERWRITE into table ${APP}.ods_u_user partition(dt='$do_date');
"
#(目前采用全量导入)
case $1 in
"all"){
    $hive -e "$sql1"
};;
esac
View Code

 

 

 

###############实际案例##############

说明: 库表结构

1、创建库表

drop table if exists default.data
create table default.data(id BIGINT,name STRING, create_time TIMESTAMP)ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';

2、全量导入

sqoop import \
--connect jdbc:mysql://192.168.75.101:3306/dragnet \
--username root \
--password yang156122 \
--table data \
--hive-import \
--fields-terminated-by ',' 
--m 1

 

3、增量导入

sqoop import --connect jdbc:mysql://192.168.75.101:3306/dragnet  \
--username root \
--password yang156122 \
--table data \
--target-dir '/user/hive/warehouse/data' \
--check-column create_time \
--incremental lastmodified  \
--last-value '2020-10-23 00:00:00' \
--m 1 \
--merge-key id

 

4、编写定时任务,并重启

/bin/systemctl restart  crond.service
posted @ 2020-10-23 18:08  小白啊小白,Fighting  阅读(472)  评论(0编辑  收藏  举报