展开
拓展 关闭
订阅号推广码
GitHub
视频
公告栏 关闭

构建ods层,使用脚本将数据导出到hdfs

  • 启动容器
docker start oracle
docker start hadoop
docker start hive
docker start sqoop
  • 进入容器
docker exec -it sqoop bash
cd /opt/sqoop/one_make
[root@15b0369d3f2a one_make]# ll
total 28
-rwxr--r-- 1 root root 1035 Sep 28 2021 full_import_tables.sh
-rw-r--r-- 1 root root 1237 Sep 28 2021 full_import_tables.txt
-rwxr--r-- 1 root root 1410 Sep 28 2021 incr_import_tables.sh
-rw-r--r-- 1 root root 1791 Sep 28 2021 incr_import_tables.txt
-rwxr--r-- 1 root root 2276 May 14 2021 upload_avro_schema.sh
# 执行脚本
sh upload_avro_schema.sh
  • upload_avro_schema.sh
#!/usr/bin/env bash
# 上传
# /bin/bash
workhome=/opt/sqoop/one_make
hdfs_schema_dir=/data/dw/ods/one_make/avsc
biz_date=20210101
biz_fmt_date=2021-01-01
local_schema_backup_filename=schema_${biz_date}.tar.gz
hdfs_schema_backup_filename=${hdfs_schema_dir}/avro_schema_${biz_date}.tar.gz
log_file=${workhome}/log/upload_avro_schema_${biz_fmt_date}.log
# 打印日志
log() {
cur_time=`date "+%F %T"`
echo "${cur_time} $*" >> ${log_file}
}
source /etc/profile
cd ${workhome}
# hadoop fs [generic options] [-test -[defsz] <path>]
# -test -[defsz] <path> :
# Answer various questions about <path>, with result via exit status.
# -d return 0 if <path> is a directory.
# -e return 0 if <path> exists.
# -f return 0 if <path> is a file.
# -s return 0 if file <path> is greater than zero bytes in size.
# -z return 0 if file <path> is zero bytes in size, else return 1.
log "Check if the HDFS Avro schema directory ${hdfs_schema_dir}..."
hdfs dfs -test -e ${hdfs_schema_dir} > /dev/null
if [ $? != 0 ]; then
log "Path: ${hdfs_schema_dir} is not exists. Create a new one."
log "hdfs dfs -mkdir -p ${hdfs_schema_dir}"
hdfs dfs -mkdir -p ${hdfs_schema_dir}
fi
log "Check if the file ${hdfs_schema_dir}/CISS4_CISS_BASE_AREAS.avsc has uploaded to the HFDS..."
hdfs dfs -test -e ${hdfs_schema_dir}/CISS4_CISS_BASE_AREAS.avsc.avsc > /dev/null
if [ $? != 0 ]; then
log "Upload all the .avsc schema file."
log "hdfs dfs -put ${workhome}/java_code/*.avsc ${hdfs_schema_dir}"
hdfs dfs -put ${workhome}/java_code/*.avsc ${hdfs_schema_dir}
fi
# backup
log "Check if the backup tar.gz file has generated in the local server..."
if [ ! -e ${local_schema_backup_filename} ]; then
log "package and compress the schema files"
log "tar -czf ${local_schema_backup_filename} ./java_code/*.avsc"
tar -czf ${local_schema_backup_filename} ./java_code/*.avsc
fi
log "Check if the backup tar.gz file has upload to the HDFS..."
hdfs dfs -test -e ${hdfs_schema_backup_filename} > /dev/null
if [ $? != 0 ]; then
log "upload the schema package file to HDFS"
log "hdfs dfs -put ${local_schema_backup_filename} ${hdfs_schema_backup_filename}"
hdfs dfs -put ${local_schema_backup_filename} ${hdfs_schema_backup_filename}
fi
  • 生成如下

  • 进入hadoop容器查看

[root@f35c2d30005b /]# hdfs dfs -cat /data/dw/ods/one_make/avsc/CISS4_CISS_BASE_DEVICE.avsc
{
"type" : "record",
"name" : "CISS4_CISS_BASE_DEVICE",
"doc" : "Sqoop import of CISS4.CISS_BASE_DEVICE",
"fields" : [ {
"name" : "CODE",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "CODE",
"sqlType" : "12"
}, {
"name" : "TYPE",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "TYPE",
"sqlType" : "12"
}, {
"name" : "NAME",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "NAME",
"sqlType" : "12"
}, {
"name" : "MODEL",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "MODEL",
"sqlType" : "12"
}, {
"name" : "OILGUN_NUM",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "OILGUN_NUM",
"sqlType" : "12"
}, {
"name" : "CONFIGURATION",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "CONFIGURATION",
"sqlType" : "12"
}, {
"name" : "PUMP_TYPE",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "PUMP_TYPE",
"sqlType" : "12"
}, {
"name" : "SOFT_TYPE",
"type" : [ "null", "string" ],
"default" : null,
"columnName" : "SOFT_TYPE",
"sqlType" : "12"
} ],
"tableName" : "CISS4.CISS_BASE_DEVICE"
  • 执行如下
sh full_import_tables.sh
  • 查看脚本
#!/usr/bin/env bash
# /bin/bash
biz_date=20210101
biz_fmt_date=2021-01-01
dw_parent_dir=/data/dw/ods/one_make/full_imp
workhome=/opt/sqoop/one_make
full_imp_tables=${workhome}/full_import_tables.txt
mkdir ${workhome}/log
orcl_srv=oracle.bigdata.cn
orcl_port=1521
orcl_sid=helowin
orcl_user=ciss
orcl_pwd=123456
sqoop_import_params="sqoop import -Dmapreduce.job.user.classpath.first=true --outdir ${workhome}/java_code --as-avrodatafile"
sqoop_jdbc_params="--connect jdbc:oracle:thin:@${orcl_srv}:${orcl_port}:${orcl_sid} --username ${orcl_user} --password ${orcl_pwd}"
# load hadoop/sqoop env
source /etc/profile
while read p; do
# parallel execution import
${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p}/${biz_date} --table ${p^^} -m 1 &
cur_time=`date "+%F %T"`
echo "${cur_time}: ${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p}/${biz_date} --table ${p} -m 1 &" >> ${workhome}/log/${biz_fmt_date}_full_imp.log
sleep 30
done < ${full_imp_tables}
查看详情
  • full_import_tables.txt
ciss4.ciss_base_areas
ciss4.ciss_base_baseinfo
ciss4.ciss_base_csp
ciss4.ciss_base_customer
ciss4.ciss_base_device
ciss4.ciss_base_device_detail
ciss4.ciss_base_device_factory_info
ciss4.ciss_base_device_model
ciss4.ciss_base_device_name
ciss4.ciss_base_device_sys_def_fac
ciss4.ciss_base_device_type
ciss4.ciss_base_fault_category
ciss4.ciss_base_material
ciss4.ciss_base_material_detail
ciss4.ciss_base_nobusiness_subsidy
ciss4.ciss_base_no_subsidy_temp
ciss4.ciss_base_oilstation
ciss4.ciss_base_oilstation_contract
ciss4.ciss_base_oilstation_user
ciss4.ciss_base_reasoncode
ciss4.ciss_base_servicestation
ciss4.ciss_base_subsidy
ciss4.ciss_base_sub_accounitem
ciss4.ciss_base_warehouse
ciss4.ciss_base_warehouse_location
ciss4.ciss_csp_device_category_money
ciss4.ciss_csp_device_fault
ciss4.ciss_csp_device_inst_money
ciss4.ciss_csp_exchanged_m_dtl
ciss4.ciss_csp_oil_station
ciss4.ciss_csp_oil_station_device
ciss4.ciss_csp_warehouse_code
ciss4.ciss_device_inst_money
ciss4.ciss_fault_hour
ciss4.ciss_r_serstation_warehouse
ciss4.ciss_s_device_install_type
ciss4.ciss_user_call_center_user
ciss4.eos_dict_entry
ciss4.eos_dict_type
ciss4.org_employee
ciss4.org_emporg
ciss4.org_empposition
ciss4.org_organization
ciss4.org_position
  • 查看hdfs

  • 执行如下
sh incr_import_tables.sh
  • 查看脚本
#!/usr/bin/env bash
# 编写SHELL脚本的时候要特别小心,特别是编写SQL的条件,如果中间加了空格,就会导致命令执行失败
# /bin/bash
biz_date=20210101
biz_fmt_date=2021-01-01
dw_parent_dir=/data/dw/ods/one_make/incr_imp
workhome=/opt/sqoop/one_make
incr_imp_tables=${workhome}/incr_import_tables.txt
orcl_srv=oracle.bigdata.cn
orcl_port=1521
orcl_sid=helowin
orcl_user=ciss
orcl_pwd=123456
mkdir ${workhome}/log
sqoop_condition_params="--where \"'${biz_fmt_date}'=to_char(CREATE_TIME,'yyyy-mm-dd')\""
sqoop_import_params="sqoop import -Dmapreduce.job.user.classpath.first=true --outdir ${workhome}/java_code --as-avrodatafile"
sqoop_jdbc_params="--connect jdbc:oracle:thin:@${orcl_srv}:${orcl_port}:${orcl_sid} --username ${orcl_user} --password ${orcl_pwd}"
# load hadoop/sqoop env
source /etc/profile
while read p; do
# clean old directory in HDFS
hdfs dfs -rm -r ${dw_parent_dir}/${p}/${biz_date}
# parallel execution import
${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p}/${biz_date} --table ${p^^} ${sqoop_condition_params} -m 1 &
cur_time=`date "+%F %T"`
echo "${cur_time}: ${sqoop_import_params} ${sqoop_jdbc_params} --target-dir ${dw_parent_dir}/${p,,}/${biz_date} --table ${p} ${sqoop_condition_params} -m 1 &" >> ${workhome}/log/${biz_fmt_date}_incr_imp.log
sleep 30
done < ${incr_imp_tables}
查看详情
  • incr_import_tables.txt
ciss4.ciss_csp_m_bnblp_hx_sqd
ciss4.ciss_csp_m_bnblp_hx_sqd_dtl
ciss4.ciss_csp_m_bnlp_hx_sqd
ciss4.ciss_csp_m_bnlp_hx_sqd_dtl
ciss4.ciss_csp_m_wl_db_sqd
ciss4.ciss_csp_m_wl_db_sqd_dtl
ciss4.ciss_csp_m_wl_sqd
ciss4.ciss_csp_m_wl_sqd_dtl
ciss4.ciss_csp_m_xhp_hx_sqd
ciss4.ciss_csp_m_xhp_hx_sqd_dtl
ciss4.ciss_csp_m_xhp_sqd
ciss4.ciss_csp_m_xhp_sqd_dtl
ciss4.ciss_csp_workorder
ciss4.ciss_csp_workorder_device
ciss4.ciss_material_bnblp_hx_sqd
ciss4.ciss_material_bnblp_hx_sqd_dtl
ciss4.ciss_material_bnlp_hx_sqd
ciss4.ciss_material_bnlp_hx_sqd_dtl
ciss4.ciss_material_sx_repair_dtl
ciss4.ciss_material_sx_sqd
ciss4.ciss_material_wdwl_db_sqd
ciss4.ciss_material_wdwl_db_sqd_dtl
ciss4.ciss_material_wdwl_sqd
ciss4.ciss_material_wdwl_sqd_dtl
ciss4.ciss_material_wdwl_sq_dtl_seq
ciss4.ciss_material_wldzmx_sqd
ciss4.ciss_material_xhp_hx_sqd
ciss4.ciss_material_xhp_hx_sqd_dtl
ciss4.ciss_material_xhp_sqd
ciss4.ciss_material_xhp_sqd_dtl
ciss4.ciss_service_callaccept
ciss4.ciss_service_changeuser_record
ciss4.ciss_service_cstm_evaluation
ciss4.ciss_service_exchanged_m_dtl
ciss4.ciss_service_expense_report
ciss4.ciss_service_exp_report_dtl
ciss4.ciss_service_fault_dtl
ciss4.ciss_service_inspection
ciss4.ciss_service_install
ciss4.ciss_service_install_validate
ciss4.ciss_service_order
ciss4.ciss_service_order_device
ciss4.ciss_service_other
ciss4.ciss_service_remould
ciss4.ciss_service_repair
ciss4.ciss_service_repaired_m_dtl
ciss4.ciss_service_return_visit
ciss4.ciss_service_travel_expense
ciss4.ciss_service_trvl_exp_dtl
ciss4.ciss_service_trvl_exp_sum
ciss4.ciss_service_workorder
ciss4.ciss_service_workorder_back
ciss4.ciss_service_workorder_user
ciss4.ciss_s_exp_report_wo_payment
ciss4.ciss_s_install_exp_report
ciss4.ciss_s_install_exp_report_dtl
ciss4.ciss_s_install_exp_rep_02_dtl
  • 查看hdfs

posted @   DogLeftover  阅读(32)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?
点击右上角即可分享
微信分享提示