【Hive学习之四】Hive 案例

环境
  虚拟机:VMware 10
  Linux版本:CentOS-6.5-x86_64
  客户端:Xshell4
  FTP:Xftp4
  jdk8
  hadoop-3.1.1
  apache-hive-3.1.1

 

一、需求:统计出掉线率最高的前10基站
  数据:
    record_time:通话时间
    imei:基站编号
    cell:手机编号
    drop_num:掉话的秒数
    duration:通话持续总秒数

1、建表

--数据表
create table cell_monitor(
record_time string,
imei string,
cell string,
ph_num string,
call_num string,
drop_num int,
duration int,
drop_rate DOUBLE,
net_type string,
erl string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;

--结果表
create table cell_drop_monitor(
imei string,
total_call_num int,
total_drop_num int,
d_rate DOUBLE
) 
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;

2、导入数据

LOAD DATA LOCAL INPATH '/root/cdr_summ_imei_cell_info.csv' OVERWRITE INTO TABLE cell_monitor;
#展示前10条
hive> select * from cell_monitor limit 10; OK record_time imei cell ph_num call_num NULL NULL NULL net_type erl 2011-07-13 00:00:00+08 356966 29448-37062 0 0 0 0 0.0 G 0 2011-07-13 00:00:00+08 352024 29448-51331 0 0 0 0 0.0 G 0 2011-07-13 00:00:00+08 353736 29448-51331 0 0 0 0 0.0 G 0 2011-07-13 00:00:00+08 353736 29448-51333 0 0 0 0 0.0 G 0 2011-07-13 00:00:00+08 351545 29448-51333 0 0 0 0 0.0 G 0 2011-07-13 00:00:00+08 353736 29448-51343 1 0 0 8 0.0 G 0 2011-07-13 00:00:00+08 359681 29448-51462 0 0 0 0 0.0 G 0 2011-07-13 00:00:00+08 354707 29448-51462 0 0 0 0 0.0 G 0 2011-07-13 00:00:00+08 356137 29448-51470 0 0 0 0 0.0 G 0 Time taken: 0.132 seconds, Fetched: 10 row(s) hive>

出现NULL 是因为字段类型是非字符串类型,匹配不上 所以显示NULL

 

3、查询掉线率 倒序排列

from cell_monitor cm 
insert overwrite table cell_drop_monitor  
select cm.imei,sum(cm.drop_num),sum(cm.duration),sum(cm.drop_num)/sum(cm.duration) d_rate 
group by cm.imei 
sort by d_rate desc;

 

 

二、使用hive实现wordcount

1、建表

--数据表
create table docs(line string);
--结果表
create table wc(word string, totalword int);
hive> create table docs(line string);
OK
Time taken: 0.722 seconds
hive> create table wc(word string, totalword int);
OK
Time taken: 0.045 seconds

 

2、导入数据

/root/wc:

hadoop hello world
hello hadoop
hbase zookeeper
name name name

导入数据:

hive> load data local inpath '/root/wc' into table docs;
Loading data to table default.docs
OK
Time taken: 0.392 seconds
hive> select * from docs;
OK
hadoop hello world
hello hadoop
hbase zookeeper
name name name
Time taken: 1.728 seconds, Fetched: 4 row(s)

 

3、统计

hive> select explode(split(line, ' ')) as word from docs;
OK
hadoop
hello
world
hello
hadoop
hbase
zookeeper
name
name
name
Time taken: 0.377 seconds, Fetched: 10 row(s)
hive> 

下面统计语句会产生MR任务:

from (select explode(split(line, ' ')) as word from docs) w 
insert into table wc 
select word, count(1) as totalword 
group by word 
order by word;

4、查询结果

hive> select * from wc;
OK
hadoop 2
hbase 1
hello 2
name 3
world 1
zookeeper 1
Time taken: 0.121 seconds, Fetched: 6 row(s)
hive>

 

posted @ 2019-02-14 16:33  cac2020  阅读(795)  评论(0编辑  收藏  举报