导出到本地文件
insert overwrite local directory '/home/hadoop'
select * from test1;
导出到hdfs
insert overwrite directory '/data/input'
select * from test1;
导出到本地文件
hive -e "select * from test1" >> /home/hadoop/d.txt
hive -f a.sql >> local/e.txt
导入到其它表
insert overwrite 会覆盖已经存在的数据,我们假设要插入的数据和已经存在的N条数据一样,那么插入后只会保留一条数据;
insert into table t2 select * from t1;
insert into 只是简单的copy插入,不做重复性校验,如果插入前有N条数据和要插入的数据一样,那么插入后会有N+1条数据;
insert overwrite table t2 select * from t1;
创建表:
create TABLE tmp2
as
select t2.*,1 as d2
from t2
where EXISTS (select 1 from t1
where t1.account_id = t2.account_id)
更新数据update:
INSERT OVERWRITE TABLE tmp_jxsj_questionnaire_only
select t2.dt, t2.record_date,
when t2.q3 = '1' and t2.q10 = '7' then 0
else 1
end validity_flag
from t2
添加字段:
ALTER TABLE tmp_jxsj_questionnaire_5 ADD COLUMNS ( d2 string );
删除表
DROP TABLE [IF EXISTS] table_name;
插入数据
set hive.exec.dynamic.partition.mode=nonstrict; set mapreduce.job.reduces=15; set hive.exec.compress.output=true; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; INSERT INTO role_info PARTITION (dt) SELECT t.*, t.dt -- 对应参数PARTITION (dt),可以对应多个分区参数,按顺序一一对应即可 from bigtable_log t where t.dt >='2017-04-21' and t.dt <= '2017-04-25'
FROM bigtable_log a INSERT overwrite TABLE test1 SELECT a.account_id where a.actiontype=25 and a.dt>='2017-04-20' limit 1 INSERT overwrite TABLE test2 SELECT a.account_id where a.actiontype=11 and a.dt>='2017-04-20' limit 1;