explode与lateral view详解

1. explode炸裂一行的数据

求一个界面的广告数量
page   adid
page1  1
page1  2
page1  3
page1  4


[root@node1 data]# cat ad.txt 
page1,1_3_5_9_10_56
page2,30_123_34_7_9_10
page3,40_50_60_88_23_10_9


hive (test)> create table page_ad(
           > page string,
           > aids string)
           > row format delimited fields terminated by ','
           > lines terminated by '\n';


hive (test)> load data local inpath "/opt/data/ad.txt" into table page_ad;


select 
    adid,
    count(*) 
from page_ad 
lateral view explode(split(aids, '_')) tmp as adid group by adid;

2. explode炸裂得到了多行

# 有一个文件,内容如下:userid, order_info
1,water:5_friut:10_c:40_d:5
2,a:10_b:20_d:50
3,a:1
4,a:30_d:40
# 案例:统计每个用户的消费金额
order(userid int, product map<string, int>)
# 提醒
1 water 5
1 fruit 10


# 创建表格
create table tt(
  userid int, 
  product map<string,int>
)
row format delimited 
fields terminated by ',' 
collection items terminated by '_' 
map keys terminated by ':' 
lines terminated by '\n';


# 查表格
select 
  id,
  tmp.fruit,
  tmp.num 
from tt 
lateral view explode(product) tmp as fruit, num;


# 得出每个用户的消费金额
select 
  id,
  sum(tmp.num) 
from tt 
lateral view explode(product) tmp as fruit, num 
group by id;

3. explode炸裂多行

# 有一个文件,记录学生三次模拟考试的成绩,每一次模拟考试有三门课
[root@node1 data]# cat score.txt
s001,60_90_20,90_80_30,70_60_39
s002,61_80_50,60_35_42,80_70_40
s003,62_70_60,70_75_56,90_80_41
s004,63_60_40,90_65_77,100_90_42


# 求学生三次模拟考试的平均成绩
s001  60  90  70
s001  90  80  60
s001  20  30  39


create table sc(
           > sid int,
           > first_score string,
           > second_score string,
           > third_score string)
           > row format delimited by ','
           > collection items terminated by '_'
           > lines terminated by '\n';


hive (test)> select sid,fs from sc lateral view explode(split(first_score, "_")) tmp1 as fs
           > union
           > select sid,ss from sc lateral view explode(split(second_score, "_")) tmp1 as ss
           > union
           > select sid,ts from sc lateral view explode(split(third_score, "_")) tmp1 as ts;
posted @ 2022-08-03 18:59  jsqup  阅读(137)  评论(0编辑  收藏  举报