hive笔记
hive创建数据库
create database hive;
因为hive数据库已经存在,所以会抛出异常,这时候加上if not exists,
就会直接创建成功,原因则是覆盖了。
create database if not exists hive;
进入数据库
use hive;
创建usr表
create table if not exists usr (id int,name string,age int);
指定存储路径:create table if not exists hive.usr (id int,name string,age int);
location '/usr/local/hive/warehouse/hive/usr';
通过创建视图只查看usr表中的id , age 两个属性
create view little_usr as select id,age from usr;
查看hive中的所有数据库
show database;
查看hive中以h开头的所有数据库
show database like 'h.*';
查看表和视图,首先进入数据库
use hive;
show tables;
查看数据库hive中以u开头的所有表和视图
show tables in hive like 'u.*';
把linux本地文件(local)"/usr/local/data"中的数据放进usr表中,并覆盖(overwrite)原来的数据
load data local inpath '/usr/local/data' overwrite into table usr;
PS:不覆盖则不用添加(overwrite)
把hdfs中的数据放进usr表中并覆盖原有数据
load data inpath '/usr/local/data' overwrite into table usr;
向表usr1中插入来自usr表中的数据并覆盖原来数据
insert overwrite table usr1 select * from usr where age=10;
向表usr1中插入来自usr表的数据并追加在原有数据后
insert into table usr1 select * from usr where age=10;
进入hive命令行,编写hiveQL语句实现wordcount词频统计
create table docs(line string)row format delimited fields terminated by ',' ;创建一个单行的表并且分隔符号为逗号
load data local
inpath '/opt/wordcount' overwrite into table docs;把wordcount单词导入进docs表中
create table word_count as 创建结果表
select word,count(1) as count from
(select explode(split(line,' '))as w
'
ord from docs) w
group by word 统计出现次数
order by word; 按字母排序
select * from word_count;查看最后的结果
从hive里面向linux本地导出数据
insert overwrite local directory '/opt/data' select * from table8;
从hive里面向hdfs导出数据
insert overwrite directory ‘/output/’select * from word_count;
导入到hive的另一个表中
insert into table h1 select * from word_count;
创建表并且以逗号分隔
create table taxi_1 (scwd string,scjd string,xcwd string,xcjd string) row format delimited fields terminated by ',' ;
查看表所在位置
desc extended 表名;
row format serde ‘org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe’ with serdeproperties('serialization.null.format' = '')
创建数据库----数据库中创建表----表创建先定义分隔符和分区等----导入数据(分为本地和hdfs)----分析数据----统计数据----sqoop导入mysql
create database hive;
use hive;
create table t_1(name string,id int,age int)
partitioned by(dt string)
row format delimited fields terminated by ','
location '/tmp/data';
load data local inpath '/opt/data' overwrite into table t_1;
select * from t_1;
select count(*) from t_1;
使用sed命令
删除第一行
sed -i '1d' filename
删除第n行
sed -i 'nd' filename
删除最后一行
sed -i '$d' filename
5.动态分区表的属性
? 使用动态分区表必须配置的参数 :
? ?
set hive.exec.dynamic.partition =true(默认false),表示开启动态分区功能
? ?
set hive.exec.dynamic.partition.mode = nonstrict(默认strict),表示允许所有分区都是动态的,否则必须有静态分区字段
?动态分区相关的调优参数:
? ?
set? hive.exec.max.dynamic.partitions.pernode=100 (默认100,一般可以设置大一点,比如1000)
? ? ? ?表示每个maper或reducer可以允许创建的最大动态分区个数,默认是100,超出则会报错。
? ?
set hive.exec.max.dynamic.partitions =1000(默认值)?
? ? ? ?表示一个动态分区语句可以创建的最大动态分区个数,超出报错
? ?
set hive.exec.max.created.files =10000(默认) 全局可以创建的最大文件个数,超出报错。
》》》》》》》》》》》hive头部显示
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
1.启动hiveserver2
2.启动beeline
3.连接hive2 !connect jdbc:hive2://master:10000
》》》》》》》》》》》必须启动hiveserver2
行转列
concat
列转行
select id,cname
from tb_1
LATERAL VIEW explode(name) table_1 as cname;
create table if not test(name string,id int,job array<string>,sex map<string,int>)
partitioned by (dt string,day string)
row format delimited fields terminated by ','
create table test(id int,name string,age string)
partitioned by (year int,month string)
row format delimited fields terminated by ','
select get_json_object(t17.json,"$.name"),get_json_object(t17.json,'$.age'),get_json_object(json,'$.sex')from t17 limit 10;
jj,1,job_user,ss:11
kk,2,job_admin,ll:22
select n,C,B,o,q,u,f from count5; 总数据
create table count4(A1 int,B1 string,C1 string,d1 string,e1 string,f1 string,g1 string,h1 string,i1 string)row format delimited fields terminated by ',';
select student.studentId,student.name,course.courseId,course.courseName,studentscore.dt,studentscore.score,studentscore.make_upScore,studentscore.rebuildScore
from course inner join studentscore on course.courseId = studentscore.courseId
inner join student on studentscore.studentId = student.studentId
select s.studentId,s.name,c.courseId,c.courseName,ss.dt,ss.score,ss.make_upScore,ss.rebuildScore
from course c inner join studentscore ss on c.courseId = ss.courseId
inner join student s on ss.studentId = s.studentId where concat(c.courseName,s.name) like '%蒋%';
sqoop import --connect jdbc:mysql://hadoop:3306/my --username root --password Newland@123 --table test --hive-import /user/hive/warehouse/hive.db/test -m 1
sqoop import --connect jdbc:mysql://192.168.56.110:3306/my --username root --password Newland@123 --table test --fields-terminated-by "," --lines-terminated-by "\n" --hive-import --hive-overwrite --create-hive-test --delete-target-dir --hive-database hive --hive-table test
sqoop import \
--connect jdbc:mysql://192.168.56.110:3306/my \
--username root \
--password Newland@123 \
--table test \
--fields-terminated-by ',' \
--delete-target-dir \
--num-mappers 1 \
--hive-import \
--hive-database hive \
--hive-table test
sqoop import --connect jdbc:mysql://192.168.56.110:3306 / my --username root --password Newland@123 --table test --delete-target-dir --num-mappers 1 --hive-import -m 1 --hive-database hive --hive-table test --fields-terminated-by',';