create table IF NOT EXISTS students2
(
    id bigint,
    name string,
    age int,
    gender string,
    clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/bigdata30/students2'; // 指定Hive表的数据的存储位置，一般在数据已经上传到HDFS，想要直接使用，会指定Location，通常Locaion会跟外部表一起使用，内部表一般使用默认的location

3.指定存储格式

create table if not exists 表名
(
 id bigint,
 name string
)
row format delimited fields terminated by ','
stored as orc   //指定存储格式为orc
location '/bigdata30/out6';

如果不指定，默认为textfile，注意：除textfile以外，其他的存储格式的数据都不能直接加载，需要使用从表加载的方式。

4.将查询结果创建为一个新表

创建一张表，将某个语句所查询到的数据进行储存，专门用来存储查询语句的表

CREATE TABLE 新表名 AS SELECT 查询语句;

CREATE TABLE adult_students AS
SELECT * FROM students
WHERE age >= 18;

这个语句的含义是，从students表格中选择所有年龄大于等于18岁的学生信息，并将结果存储到名为adult_students的新表格中。

5.模仿已存在表的格式创建不带数据的相同格式的表

这个语法是在数据库中创建一个新表，但是不会复制数据，只会复制表结构，模仿另一张表的形式创建新表格。它的形式是：

CREATE TABLE 新表名 LIKE 要模仿的表名;

举个例子，假设我们有一个名为employees的表格，其中包含员工的姓名、工号、部门等信息。我们想要创建一个结构与employees相同但不含数据的新表格employees_backup，可以执行以下语句：

CREATE TABLE employees_backup LIKE employees;

简单用户信息表创建：

create table t_user(
id int,
uname string,
pwd string,
gender string,
age int
)
row format delimited fields terminated by ','
lines terminated by '\n';   //满一行就换行

1,admin,123456,男,18
2,zhangsan,abc123,男,23
3,lisi,654321,女,16

复杂人员信息表创建：

create table IF NOT EXISTS t_person(
name string,
friends array<string>,
children map<string,int>,
address struct<street:string ,city:string>
)
row format delimited fields terminated by ',' -- 列与列之间的分隔符
collection items terminated by '_' -- 元素与元素之间分隔符
map keys terminated by ':' -- Map数据类型键与值之间的分隔符
lines terminated by '\n';  -- 行与行之间的换行符

songsong,bingbing_lili,xiao song:18_xiaoxiao song:19,beng bu_anhui
yangyang,caicai_susu,xiao yang:18_xiaoxiao yang:19,he fei_anhui

6.表详细信息

show tables;
show tables like 'u*';
desc t_person;
desc formatted students; // 更加详细

7.修改列

查询表结构

desc students2;

添加列

alter table students2 add columns (education string);
ALTER TABLE 表名 ADD COLUMN 列名 数据类型;

查询表结构

desc students2;

更新列

alter table stduents2 change education educationnew string;
ALTER TABLE 表名 CHANGE 旧列名 新列名 新数据类型;

8.删除表

drop table students2;

6.Hive内外部表

面试题：内部表和外部表的区别？如何创建外部表？工作中使用外部表

1.hive内部表

当创建好表的时候，HDFS会在当前表所属的库中创建一个文件夹

当设置表路径的时候，如果直接指向一个已有的路径,可以直接去使用文件夹中的数据

当load数据的时候，就会将数据文件存放到表对应的文件夹中

而且数据一旦被load，就不能被修改

我们查询数据也是查询文件中的文件,这些数据最终都会存放到HDFS

当我们删除表的时候，表对应的文件夹会被删除，同时数据也会被删除

默认建表的类型就是内部表

// 内部表
create table students_internal
(
    id bigint,
    name string,
    age int,
    gender string,
    clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/input2';

hive> dfs -put /usr/local/soft/data/students.txt /input2/;

2.Hive外部表

（多了个 external 关键字）

外部表说明

外部表因为是指定其他的hdfs路径的数据加载到表中来，所以hive会认为自己不完全独占这份数据

删除hive表的时候，数据仍然保存在hdfs中，不会删除。

// 外部表
create external table students_external
(
    id bigint,
    name string,
    age int,
    gender string,
    clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/input3';

hive> dfs -put /usr/local/soft/data/students.txt /input3/;

删除表测试一下：

hive> drop table students_internal;
Moved: 'hdfs://master:9000/input2' to trash at: hdfs://master:9000/user/root/.Trash/Current
OK
Time taken: 0.474 seconds
hive> drop table students_external;
OK
Time taken: 0.09 seconds
hive>

一般在公司中，使用外部表多一点，因为数据可以需要被多个程序使用，避免误删，通常外部表会结合location一起使用

外部表还可以将其他数据源中的数据映射到 hive中，比如说：hbase，ElasticSearch......

设计外部表的初衷就是让表的元数据与数据解耦

posted on 2024-06-12 20:52 By远方阅读(521) 评论(0) 收藏举报

刷新页面返回顶部

公告

导航

1.开启hive

2.上传数据到hdfs

1.在hive中，将linux中创建本地文件上传至hdfs

2.在hive中，将hdfs上面文件移动到hive表中

3.在hive中，将已存在的表中插入自己写的sql语句

4.在linux中，将本地数据传入到hdfs

3.hive导出数据

1.将查询结果存放到本地

4.库操作

1.创建数据库

2.避免已经存在的错误

3.指定位置创建数据库

4.修改数据库

5.数据库详细信息

6.切换数据库

7.删除数据库

5.表操作

1.默认建表

2.指定location建表