展开
拓展 关闭
订阅号推广码
GitHub
视频
公告栏 关闭

构建ods层,使用脚本将数据导出到hdfs

  • 启动容器
docker start oracle
docker start hadoop
docker start hive
docker start sqoop
  • 进入容器
docker exec -it sqoop bash

cd /opt/sqoop/one_make

[root@15b0369d3f2a one_make]# ll
total 28
-rwxr--r-- 1 root root 1035 Sep 28  2021 full_import_tables.sh
-rw-r--r-- 1 root root 1237 Sep 28  2021 full_import_tables.txt
-rwxr--r-- 1 root root 1410 Sep 28  2021 incr_import_tables.sh
-rw-r--r-- 1 root root 1791 Sep 28  2021 incr_import_tables.txt
-rwxr--r-- 1 root root 2276 May 14  2021 upload_avro_schema.sh

# 执行脚本
sh upload_avro_schema.sh
  • upload_avro_schema.sh
#!/usr/bin/env bash
# 上传
# /bin/bash
workhome=/opt/sqoop/one_make
hdfs_schema_dir=/data/dw/ods/one_make/avsc
biz_date=20210101
biz_fmt_date=2021-01-01
local_schema_backup_filename=schema_${biz_date}.tar.gz
hdfs_schema_backup_filename=${hdfs_schema_dir}/avro_schema_${biz_date}.tar.gz
log_file=${workhome}/log/upload_avro_schema_${biz_fmt_date}.log

# 打印日志
log() {
    cur_time=`date "+%F %T"`
    echo "${cur_time} $*" >> ${log_file}
}

source /etc/profile
cd ${workhome}

#  hadoop fs [generic options] [-test -[defsz] <path>]
# -test -[defsz] <path> :
#   Answer various questions about <path>, with result via exit status.
#     -d  return 0 if <path> is a directory.
#     -e  return 0 if <path> exists.
#     -f  return 0 if <path> is a file.
#     -s  return 0 if file <path> is greater than zero bytes in size.
#     -z  return 0 if file <path> is zero bytes in size, else return 1.

log "Check if the HDFS Avro schema directory ${hdfs_schema_dir}..."
hdfs dfs -test -e ${hdfs_schema_dir} > /dev/null

if [ $? != 0 ]; then
    log "Path: ${hdfs_schema_dir} is not exists. Create a new one."
    log "hdfs dfs -mkdir -p ${hdfs_schema_dir}"
    hdfs dfs -mkdir -p ${hdfs_schema_dir}
fi

log "Check if the file ${hdfs_schema_dir}/CISS4_CISS_BASE_AREAS.avsc has uploaded to the HFDS..."
hdfs dfs -test -e ${hdfs_schema_dir}/CISS4_CISS_BASE_AREAS.avsc.avsc > /dev/null
if [ $? != 0 ]; then
    log "Upload all the .avsc schema file."
    log "hdfs dfs -put ${workhome}/java_code/*.avsc ${hdfs_schema_dir}"
    hdfs dfs -put ${workhome}/java_code/*.avsc ${hdfs_schema_dir}
fi

# backup
log "Check if the backup tar.gz file has generated in the local server..." 
if [ ! -e ${local_schema_backup_filename} ]; then
    log "package and compress the schema files"
    log "tar -czf ${local_schema_backup_filename} ./java_code/*.avsc"
    tar -czf ${local_schema_backup_filename} ./java_code/*.avsc
fi

log "Check if the backup tar.gz file has upload to the HDFS..."
hdfs dfs -test -e ${hdfs_schema_backup_filename} > /dev/null
if [ $? != 0 ]; then
    log "upload the schema package file to HDFS"
    log "hdfs dfs -put ${local_schema_backup_filename} ${hdfs_schema_backup_filename}"
    hdfs dfs -put ${local_schema_backup_filename} ${hdfs_schema_backup_filename}
fi
  • 生成如下

  • 进入hadoop容器查看

[root@f35c2d30005b /]# hdfs dfs -cat /data/dw/ods/one_make/avsc/CISS4_CISS_BASE_DEVICE.avsc
{
  "type" : "record",
  "name" : "CISS4_CISS_BASE_DEVICE",
  "doc" : "Sqoop import of CISS4.CISS_BASE_DEVICE",
  "fields" : [ {
    "name" : "CODE",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "CODE",
    "sqlType" : "12"
  }, {
    "name" : "TYPE",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "TYPE",
    "sqlType" : "12"
  }, {
    "name" : "NAME",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "NAME",
    "sqlType" : "12"
  }, {
    "name" : "MODEL",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "MODEL",
    "sqlType" : "12"
  }, {
    "name" : "OILGUN_NUM",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "OILGUN_NUM",
    "sqlType" : "12"
  }, {
    "name" : "CONFIGURATION",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "CONFIGURATION",
    "sqlType" : "12"
  }, {
    "name" : "PUMP_TYPE",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "PUMP_TYPE",
    "sqlType" : "12"
  }, {
    "name" : "SOFT_TYPE",
    "type" : [ "null", "string" ],
    "default" : null,
    "columnName" : "SOFT_TYPE",
    "sqlType" : "12"
  } ],
  "tableName" : "CISS4.CISS_BASE_DEVICE"
  • 执行如下
sh full_import_tables.sh
  • 查看脚本
#!/usr/bin/env bash
# /bin/bash
biz_date=20210101
biz_fmt_date=2021-01-01
dw_parent_dir=/data/dw/ods/one_make/full_imp
workhome=/opt/sqoop/one_make
full_imp_tables=${workhome}/full_import_tables.txt

mkdir ${workhome}/log

orcl_srv=oracle.bigdata.cn
orcl_port=1521
orcl_sid=helowin
orcl_user=ciss
orcl_pwd=123456

sqoop_import_params="sqoop import -Dmapreduce.job.user.classpath.first=true --outdir ${workhome}/java_code --as-avrodatafile"
sqoop_jdbc_params="--connect jdbc:oracle:thin:@${orcl_srv}:${orcl_port}:${orcl_sid} --username ${orcl_user} --password ${orcl_pwd}"

# load hadoop/sqoop env
source /etc/profile

while read p; do
    # parallel execution import
    ${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p}/${biz_date} --table ${p^^} -m 1 &
    cur_time=`date "+%F %T"`
    echo "${cur_time}: ${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p}/${biz_date} --table ${p} -m 1 &" >> ${workhome}/log/${biz_fmt_date}_full_imp.log
    sleep 30
done < ${full_imp_tables}
查看详情
  • full_import_tables.txt
ciss4.ciss_base_areas
ciss4.ciss_base_baseinfo
ciss4.ciss_base_csp
ciss4.ciss_base_customer
ciss4.ciss_base_device
ciss4.ciss_base_device_detail
ciss4.ciss_base_device_factory_info
ciss4.ciss_base_device_model
ciss4.ciss_base_device_name
ciss4.ciss_base_device_sys_def_fac
ciss4.ciss_base_device_type
ciss4.ciss_base_fault_category
ciss4.ciss_base_material
ciss4.ciss_base_material_detail
ciss4.ciss_base_nobusiness_subsidy
ciss4.ciss_base_no_subsidy_temp
ciss4.ciss_base_oilstation
ciss4.ciss_base_oilstation_contract
ciss4.ciss_base_oilstation_user
ciss4.ciss_base_reasoncode
ciss4.ciss_base_servicestation
ciss4.ciss_base_subsidy
ciss4.ciss_base_sub_accounitem
ciss4.ciss_base_warehouse
ciss4.ciss_base_warehouse_location
ciss4.ciss_csp_device_category_money
ciss4.ciss_csp_device_fault
ciss4.ciss_csp_device_inst_money
ciss4.ciss_csp_exchanged_m_dtl
ciss4.ciss_csp_oil_station
ciss4.ciss_csp_oil_station_device
ciss4.ciss_csp_warehouse_code
ciss4.ciss_device_inst_money
ciss4.ciss_fault_hour
ciss4.ciss_r_serstation_warehouse
ciss4.ciss_s_device_install_type
ciss4.ciss_user_call_center_user
ciss4.eos_dict_entry
ciss4.eos_dict_type
ciss4.org_employee
ciss4.org_emporg
ciss4.org_empposition
ciss4.org_organization
ciss4.org_position
  • 查看hdfs

  • 执行如下
sh incr_import_tables.sh
  • 查看脚本
#!/usr/bin/env bash
# 编写SHELL脚本的时候要特别小心,特别是编写SQL的条件,如果中间加了空格,就会导致命令执行失败
# /bin/bash
biz_date=20210101
biz_fmt_date=2021-01-01
dw_parent_dir=/data/dw/ods/one_make/incr_imp
workhome=/opt/sqoop/one_make
incr_imp_tables=${workhome}/incr_import_tables.txt

orcl_srv=oracle.bigdata.cn
orcl_port=1521
orcl_sid=helowin
orcl_user=ciss
orcl_pwd=123456

mkdir ${workhome}/log

sqoop_condition_params="--where \"'${biz_fmt_date}'=to_char(CREATE_TIME,'yyyy-mm-dd')\""
sqoop_import_params="sqoop import -Dmapreduce.job.user.classpath.first=true --outdir ${workhome}/java_code --as-avrodatafile"
sqoop_jdbc_params="--connect jdbc:oracle:thin:@${orcl_srv}:${orcl_port}:${orcl_sid} --username ${orcl_user} --password ${orcl_pwd}"

# load hadoop/sqoop env
source /etc/profile

while read p; do
    # clean old directory in HDFS
    hdfs dfs -rm -r ${dw_parent_dir}/${p}/${biz_date}
    
    # parallel execution import
    ${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p}/${biz_date} --table ${p^^} ${sqoop_condition_params} -m 1 &
    cur_time=`date "+%F %T"`
    echo "${cur_time}: ${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p,,}/${biz_date} --table ${p} ${sqoop_condition_params} -m 1 &" >> ${workhome}/log/${biz_fmt_date}_incr_imp.log
    sleep 30
    
done < ${incr_imp_tables}
查看详情
  • incr_import_tables.txt
ciss4.ciss_csp_m_bnblp_hx_sqd
ciss4.ciss_csp_m_bnblp_hx_sqd_dtl
ciss4.ciss_csp_m_bnlp_hx_sqd
ciss4.ciss_csp_m_bnlp_hx_sqd_dtl
ciss4.ciss_csp_m_wl_db_sqd
ciss4.ciss_csp_m_wl_db_sqd_dtl
ciss4.ciss_csp_m_wl_sqd
ciss4.ciss_csp_m_wl_sqd_dtl
ciss4.ciss_csp_m_xhp_hx_sqd
ciss4.ciss_csp_m_xhp_hx_sqd_dtl
ciss4.ciss_csp_m_xhp_sqd
ciss4.ciss_csp_m_xhp_sqd_dtl
ciss4.ciss_csp_workorder
ciss4.ciss_csp_workorder_device
ciss4.ciss_material_bnblp_hx_sqd
ciss4.ciss_material_bnblp_hx_sqd_dtl
ciss4.ciss_material_bnlp_hx_sqd
ciss4.ciss_material_bnlp_hx_sqd_dtl
ciss4.ciss_material_sx_repair_dtl
ciss4.ciss_material_sx_sqd
ciss4.ciss_material_wdwl_db_sqd
ciss4.ciss_material_wdwl_db_sqd_dtl
ciss4.ciss_material_wdwl_sqd
ciss4.ciss_material_wdwl_sqd_dtl
ciss4.ciss_material_wdwl_sq_dtl_seq
ciss4.ciss_material_wldzmx_sqd
ciss4.ciss_material_xhp_hx_sqd
ciss4.ciss_material_xhp_hx_sqd_dtl
ciss4.ciss_material_xhp_sqd
ciss4.ciss_material_xhp_sqd_dtl
ciss4.ciss_service_callaccept
ciss4.ciss_service_changeuser_record
ciss4.ciss_service_cstm_evaluation
ciss4.ciss_service_exchanged_m_dtl
ciss4.ciss_service_expense_report
ciss4.ciss_service_exp_report_dtl
ciss4.ciss_service_fault_dtl
ciss4.ciss_service_inspection
ciss4.ciss_service_install
ciss4.ciss_service_install_validate
ciss4.ciss_service_order
ciss4.ciss_service_order_device
ciss4.ciss_service_other
ciss4.ciss_service_remould
ciss4.ciss_service_repair
ciss4.ciss_service_repaired_m_dtl
ciss4.ciss_service_return_visit
ciss4.ciss_service_travel_expense
ciss4.ciss_service_trvl_exp_dtl
ciss4.ciss_service_trvl_exp_sum
ciss4.ciss_service_workorder
ciss4.ciss_service_workorder_back
ciss4.ciss_service_workorder_user
ciss4.ciss_s_exp_report_wo_payment
ciss4.ciss_s_install_exp_report
ciss4.ciss_s_install_exp_report_dtl
ciss4.ciss_s_install_exp_rep_02_dtl
  • 查看hdfs

posted @ 2024-01-18 11:08  DogLeftover  阅读(22)  评论(0编辑  收藏  举报