hive外部表时location修改问题
一、有一次建外部表的时候,忘记设置location
create external table test.ads_education_course_feature_dm
(
course_id string comment '课程id',
course_name string comment '课程name',
detail_browser_times string comment '详情页浏览次数',
exposure_convert_borwser_rate string comment '曝光转化率',
pay_flow_7day string comment '七日流水',
complete_course_rate string comment '完课率',
learn_times string comment '学习次数',
learn_users string comment '学习用户数'
) comment '课程特征表'
partitioned by (pt_d string comment '天分区')
row format delimited fields terminated by '\001'
stored as orc --注意未设置location
外部表的表结构
有一个临时表 tmp_educenter_course_feature_dm(内部表,测试需要)
create table test.tmp_educenter_course_feature_dm
(
course_id string comment '课程id',
course_name string comment '课程name',
detail_browser_times string comment '详情页浏览次数',
exposure_convert_borwser_rate string comment '曝光转化率',
pay_flow_7day string comment '七日流水',
complete_course_rate string comment '完课率',
learn_times string comment '学习次数',
learn_users string comment '学习用户数'
) comment '课程特征表'
partitioned by (pt_d string comment '天分区')
row format delimited fields terminated by '\001'
stored as orc
插入测试数据
load data local inpath '/root/test.txt' into table test.tmp_educenter_course_feature_dm partition (pt_d='20201031');
二、直接关联其他表的数据再插入到ads_education_course_feature_dm中
insert overwrite table test.ads_education_course_feature_dm partition(pt_d='20201031')
select
course_id, --course_id
course_name, --'课程name',
detail_browser_times, --'详情页浏览次数',
exposure_convert_borwser_rate, --'曝光转化率',
pay_flow_7day, --'七日流水',
complete_course_rate, --'完课率',
learn_times, --'学习次数',
learn_users --'学习用户数'
from test.tmp_educenter_course_feature_dm
where pt_d='20201031'
查看外部表的分区
show partitions test.ads_education_course_feature_dm;
查看外部表路径下的分区文件
hdfs dfs -ls hdfs://node01:8020/user/hive/warehouse/test.db/ads_education_course_feature_dm
三、直接修改外部表的location
alter table test.ads_education_course_feature_dm set location '/myhive/ads/ads_education_course_feature_dm'
四、再次插入执行数据插入
insert overwrite table test.ads_education_course_feature_dm partition(pt_d='20201031')
select
course_id, --course_id
course_name, --'课程name',
detail_browser_times, --'详情页浏览次数',
exposure_convert_borwser_rate, --'曝光转化率',
pay_flow_7day, --'七日流水',
complete_course_rate, --'完课率',
learn_times, --'学习次数',
learn_users --'学习用户数'
from test.tmp_educenter_course_feature_dm
where pt_d='20201031'
五、查看修改后的location下的分区文件,发现并没有分区20201031的文件,覆盖的文件还是写到了原来的location地址
六、对于已经存在分区文件的外部表,如果要修改location,记得对已经存在的分区单独设置location,再进行数据的insert overwrite 的时候就会在新location路径下创建文件
alter table test.ads_education_course_feature_dm set location '/myhive/ads/ads_education_course_feature_dm';
alter table test.ads_education_course_feature_dm partition(pt_d='20201031') set location '/myhive/ads/ads_education_course_feature_dm';