impala的入门使用
1 使用
1.1
-
准备数据
user.csv
392456197008193000,张三,20,0 267456198006210000,李四,25,1 892456199007203000,王五,24,1 492456198712198000,赵六,26,2 392456197008193000,张三,20,0 392456197008193000,张三,20,0
-
上传数据
hadoop fs -mkdir -p /data/impala/t1 #上传本地user.csv到hdfs /user/impala/table1 hadoop fs -put user.csv /user/impala/t1
-
创建表
#进⼊impala-shell impala-shell #表如果存在则删除 drop table if exists t1; #执⾏创建 create external table t1(id string,name string,age int,gender int) row format delimited fields terminated by ',' location '/data/impala/t1';
-
查询数据
[lew3:21000] > select * from t1; Query: select * from t1 +--------------------+------+-----+--------+ | id | name | age | gender | +--------------------+------+-----+--------+ | 392456197008193000 | 张三 | 20 | 0 | | 267456198006210000 | 李四 | 25 | 1 | | 892456199007203000 | 王五 | 24 | 1 | | 492456198712198000 | 赵六 | 26 | 2 | | 392456197008193000 | 张三 | 20 | 0 | | 392456197008193000 | 张三 | 20 | 0 | +--------------------+------+-----+--------+
-
创建t2表
#创建⼀个内部表 create table t2(id string,name string,age int,gender int) row format delimited fields terminated by ','; #查看表结构 desc t1; desc formatted t2;
-
插⼊数据到t2
insert overwrite table t2 select * from t1 where gender =0; #验证数据 [lew3:21000] > select * from t2; Query: select * from t2 +--------------------+------+-----+--------+ | id | name | age | gender | +--------------------+------+-----+--------+ | 392456197008193000 | 张三 | 20 | 0 | | 392456197008193000 | 张三 | 20 | 0 | | 392456197008193000 | 张三 | 20 | 0 | +--------------------+------+-----+--------+