Apache Hive 建表操作的简单描述
客户端连接hive [root@bigdata-02 bin]# ./beeline Beeline version 1.2.1 by Apache Hive beeline> ! connect jdbc:hive2://bigdata-01:10000 Connecting to jdbc:hive2://bigdata-01:10000 Enter username for jdbc:hive2://bigdata-01:10000: root Enter password for jdbc:hive2://bigdata-01:10000: ****** Connected to: Apache Hive (version 1.2.1) Driver: Hive JDBC (version 1.2.1) Transaction isolation: TRANSACTION_REPEAtable_READ 0: jdbc:hive2://bigdata-01:10000> create database hive_test; show databases; use hive_test; 创建表 create table t_a1(id int,name string) row format delimited fields terminated by ','; 加载数据 如果在本地加local 如果不在本地 不加local load data只针对内部表 load data local inpath '/root/1.txt' into table t_a1 hadoop fs -put 1.txt /user/hive/warehouse/hive_test.db/t_a1 1.txt 1,张学友 2,刘德华 3,黎明 4,郭富城 0: jdbc:hive2://bigdata-01:10000> select * from t_a1; +----------+------------+--+ | t_a1.id | t_a1.name | +----------+------------+--+ | 1 | 张学友 | | 2 | 刘德华 | | 3 | 黎明 | | 4 | 郭富城 | +----------+------------+--+ 4 rows selected (1.358 seconds) //创建外部表 create external table t_a2(id int,name string) row format delimited fields terminated by ',' location '/test/'; hadoop fs -mkdir /test hadoop fs -put 1.txt /test 0: jdbc:hive2://bigdata-01:10000> select * from t_a2; +----------+------------+--+ | t_a2.id | t_a2.name | +----------+------------+--+ | 1 | 张学友 | | 2 | 刘德华 | | 3 | 黎明 | | 4 | 郭富城 | +----------+------------+--+ 4 rows selected (0.638 seconds) 区别 内部表的数据文件必须放到 指定的位置 外部表的数据文件 可以自己指定位置 外部表 drop table t_a2 后 数据文件依然存在 内部表 直接连表带数据文件一起删除 //分区表 create table t_user(id int,name string,area string) partitioned by(region string) row format delimited fields terminated by ','; //加载数据 load data local inpath '/root/beijing.txt' into table t_user partition(region='beijing'); load data local inpath '/root/shanghai.txt' into table t_user partition(region='shanghai'); 0: jdbc:hive2://bigdata-01:10000> select * from t_user; +----------+------------+------------+--------------+--+ | t_user.id | t_user.name | t_user.area | t_user.region | +----------+------------+------------+--------------+--+ | 1 | 张学友 | 北京 | beijing | | 2 | 刘德华 | 北京 | beijing | | 3 | 黎明 | 北京 | beijing | | 4 | 郭富城 | 北京 | beijing | | 5 | 诸葛亮 | 上海 | shanghai | | 6 | 司马懿 | 上海 | shanghai | | 7 | 周瑜 | 上海 | shanghai | +----------+------------+------------+--------------+--+ 7 rows selected (0.445 seconds) //多分区 create table day_hour_table (id int, content string) partitioned by (dt string, hour string); load data local inpath '/root/900101_08.txt' into table day_hour_table PARTITION(dt='1990-01-01', hour='08'); //分桶表 开启分桶功能:set hive.enforce.bucketing = true; 设置reduce个数等于分桶的个数:set mapreduce.job.reduces=4; 创建表 create table stu_buck(Sno int,Sname string,Sex string,Sage int,Sdept string) clustered by(Sno) into 4 buckets row format delimited fields terminated by ','; 加载方式: 1,首先创建一个普通的过渡中间表 把对应的文件映射上去 create table student(Sno int,Sname string,Sex string,Sage int,Sdept string) row format delimited fields terminated by ','; hadoop fs -put students.txt /user/hive/warehouse/hive_test.db/student 2,真正映射分桶表(insert+select) insert overwrite table stu_buck select * from student cluster by(Sno); 测试的时候可以设置本地模式 set hive.exec.mode.local.auto=true;