HDFS定时导入Hive的分区表
过程:
此代码在shell中进行编辑,并采用crontab进行定时运行
1.先将每天的数据导导到一张临时表mal.wall_log_url_tmp表中,此表可为内部表
2.然后再将临时表的数据导入到目标表中 mal.wall_log_url
#!/bin/sh # upload logs to hdfs yesterday=$(date -d "yesterday" +%Y%m%d) #获取昨日时间 /opt/soft/hive/bin/hive -e " use mal; load data inpath '/flume/data/logs/`date +%Y%m`/${yesterday}/' overwrite into table mal.wall_log_url_tmp; insert into table malwall_log_url PARTITION (dt=${yesterday}) select * from bmal.wall_log_url_tmp; "
附带两表的建表语句
临时表
create table mal.wall_log_url_tmp ( log_time string, log_key string, url_detail string, url_briefly string, url_action string, time_situation string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '#' ;
目标表
create external table mal.wfbwall_log_url ( log_time string, log_key string, url_detail string, url_briefly string, url_action string, time_situation string ) PARTITIONED BY(`dt` string) -- 分区字段 ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' -- 分隔符,即导入进来数据的默认分隔符 NULL DEFINED AS '' STORED AS TEXTFILE LOCATION '/hive/warehouse';