046.hive-数据迁移脚本
-- =================== hive迁移导出导入数据流程 =========================
-- =========================导出脚本=================================
1.创建导出库 create database export_dts_1215; 2.设置导出库为默认库 su hdfs cd ~ vim ~/.hiverc use export_dts_1215; 3.创建hql执行文件,在导出库创建导出表 create_export1215_table.sql CREATE TABLE IF NOT EXISTS export_dts_1215.master_n_product_group like dtsaas.master_n_product_group ;
CREATE TABLE IF NOT EXISTS export_dts_1215.sd_sales_cup_detail like dtsaas.sd_sales_cup_detail ;
CREATE TABLE IF NOT EXISTS export_dts_1215.sd_sales_cup_detail_v2 like dtsaas.sd_sales_cup_detail_v2 ;
执行 hive -f create_export1215_table.sql 4.创建执行文件,复制hdfs文件到导出库 copy_dtsaastoexport.sh #!/bin/bash
hadoop fs -cp /apps/hive/warehouse/dtsaas.db/master_n_product_group/* /apps/hive/warehouse/export_dts_1215.db/master_n_product_group/
hadoop fs -cp /apps/hive/warehouse/dtsaas.db/sd_sales_cup_detail/* /apps/hive/warehouse/export_dts_1215.db/sd_sales_cup_detail/
hadoop fs -cp /apps/hive/warehouse/dtsaas.db/sd_sales_cup_detail_v2/* /apps/hive/warehouse/export_dts_1215.db/sd_sales_cup_detail_v2/
执行 nohup ./copy_dtsaastoexport.sh >~/copy_dtsaastoexport20221215.log 2>&1 & 查看后台进程 [hdfs@xxxxx01 ~]$ ps -aux|grep copy_dtsaastoexport.sh [hdfs@xxxxx01 ~]$ jobs -l 5.修复分区 vim mack_table_20221215.sql MSCK REPAIR TABLE export_dts_1215.master_n_product_group ;
MSCK REPAIR TABLE export_dts_1215.sd_sales_cup_detail ;
nohup hive -f mack_table_20221215.sql >~/mack_table_20221215.log 2>&1 & 6.清空 export.hql echo '' > export.hql 生成导出语句 hive -e "show tables " | awk '{printf "export table %s to |/tmp/ds10_20221215/%s|;\n",$1,$1}' | sed "s/|/'/g" > ~/export.hql 7.创建hdfs临时导出目录 hadoop fs -mkdir /tmp/ds10_20221215 8.导出到hdfs nohup hive -f ~/export.hql >~/export_20221213.log 2>&1 & [hdfs@xxxxx01 ~]$ ps -aux|grep export.hql [hdfs@xxxxx01 ~]$ jobs -l 9.拉取数据到本地 创建本地目录 cd /data01 mkdir ds10_20221215 chown hdfs:hadoop ds10_20221215 拉取数据到本地 nohup hdfs dfs -get /tmp/ds10_20221215 /data01/ds10_20221215/ >~/hdfs_get_ds10_20221215.log 2>&1 &
hdfs dfs -du -h /apps/hive/warehouse/export_dts.db
hdfs dfs -du -h /tmp/ds10_20221215
hdfs dfs -du -h /tmp/ | grep ds10_20221215
-- =========================导入脚本=================================
查看hdfs目录下的文件
hdfs dfs -ls /apps/
传输数据到导入服务器 1.创建导入hdfs临时目录 hadoop fs -mkdir /tmp/ds10_02_02 2.导入本地文件到hdfs nohup hdfs dfs -put /data01/migration/ds10_20221215 /tmp/ds10_02_02 > ~/hdfs_put_20221215_02_02.log 2>&1 & 3.查看文件大小 [hdfs@xxxxxxx002 ~]$ hdfs dfs -du -h /tmp/ds10_02_02 51.6 M /tmp/ds10_02_02/ds10_20221215 [hdfs@xxxxxxx002 ~]$ hdfs dfs -du -h /tmp/ds10_02_02/ds10_20221215 4.创建临时导入库,并设置为默认库 create database import_dts_1215; vim ~/.hiverc use import_dts_1215; 5.生成导ru语句 import.hql hive -e "show tables " | awk '{printf "import table %s from |/tmp/ds10/ds10_20221213/%s|;\n",$1,$1}' | sed "s/|/'/g" > ~/import.hql import table sales_order_payment_item_klnm from '/tmp/ds10_02_02/ds10_20221215/sales_order_payment_item_klnm';
import table sales_order_payment_klnm from '/tmp/ds10_02_02/ds10_20221215/sales_order_payment_klnm';
import table sales_order_subitem from '/tmp/ds10_02_02/ds10_20221215/sales_order_subitem';
6.导入到导入库 nohup hive -f ~/import_1215.hql >~/import_1215.log 2>&1 & hdfs dfs -du -h /apps/hive/warehouse/import_dts_1215.db ps -aux|grep import_1215.hql 7.-- 清空默认库文件 vim ~/.hiverc
hdfs操作常用命令 查看各库大小 hdfs dfs -du -h /apps/hive/warehouse/ | grep .db 查看库中各表大小 hdfs dfs -du -h /apps/hive/warehouse/export_dts.db hdfs dfs -du -h /tmp/ds10_20221215 删除hdfs文件 hadoop fs -rm -r /tmp/ds10_02 hdfs复制文件 hadoop fs -cp /apps/hive/warehouse/dtsaas.db/master_n_product_group/* /apps/hive/warehouse/export_dts_1215.db/master_n_product_group/ hdfs本地文件上传 hdfs dfs -put /data01/klnm-migration/ds10_20221215 /tmp/ds10_02_02 hdfs文件下载到本地 hdfs dfs -get /tmp/ds10_20221215 /data01/ds10_20221215/ hdfs查看集群使用情况-查看各节点磁盘使用情况 hdfs dfsadmin -report | grep -E "Name:|DFS Used%:" nohup的基本命令 nohup hive -f ~/import_1215.hql >~/import_1215.log 2>&1 & ps -aux|grep import_1215.hql jobs -l