hive_面试题_行转列&列转行

 


1.列转行

1.说明

-- 说明 : 将 一列数据 转换成一行数据
-- 使用函数 : collect_set : 返回分组内元素 的迭代器(对元素去重)
            collect_list : 返回分组内元素 的迭代器(对元素不去重)
            concat_ws('指定分隔符',iter) : 返回 将所有元素用指定分隔符拼接的字符串 类似 iter.mkString("分隔符")

2.示例

复制代码
-- 数据准备
-- DDL
create table logintab (
`user_id` string comment '用户id',
`login_date` string comment '登入日期')
 comment '用户登入记录表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DML
insert overwrite table logintab
select '1001' as id,'2021-12-12' as occur_date union all
select '1001' as id,'2021-12-20' as occur_date union all
select '1001' as id,'2022-02-10' as occur_date union all
select '1001' as id,'2021-12-20' as occur_date union all
select '1002' as id,'2021-12-12' as occur_date union all
select '1001' as id,'2021-12-13' as occur_date union all
select '1001' as id,'2021-12-13' as occur_date union all
select '1001' as id,'2021-12-13' as occur_date union all
select '1002' as id,'2021-12-14' as occur_date union all
select '1001' as id,'2021-12-14' as occur_date union all
select '1002' as id,'2021-12-15' as occur_date union all
select '1001' as id,'2021-12-15' as occur_date union all
select '1003' as id,'2021-12-15' as occur_date union all
select '1003' as id,'2021-12-16' as occur_date union all
select '1003' as id,'2021-12-17' as occur_date union all
select '1003' as id,'2021-12-18' as occur_date union all
select '1003' as id,'2021-12-29' as occur_date union all
select '1001' as id,'2022-01-01' as occur_date union all
select '1001' as id,'2022-01-01' as occur_date union all
select '1001' as id,'2022-01-01' as occur_date union all
select '1001' as id,'2022-01-03' as occur_date union all
select '1001' as id,'2022-01-05' as occur_date union all
select '1001' as id,'2022-01-06' as occur_date union all
select '1003' as id,'2021-12-19' as occur_date ;

-- 操作sql
select
user_id
,collect_list(login_date) as list1
,collect_set(login_date) as set1
,concat_ws('*',collect_set(login_date)) as ws_set
from logintab
group by user_id;

-- 查询结果
user_id list1   set1    ws_set
1001    ["2021-12-12","2021-12-20","2022-02-10","2021-12-20","2021-12-13","2021-12-13","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-01","2022-01-01","2022-01-03","2022-01-05","2022-01-06"]        ["2021-12-12","2021-12-20","2022-02-10","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-03","2022-01-05","2022-01-06"]     2021-12-12*2021-12-20*2022-02-10*2021-12-13*2021-12-14*2021-12-15*2022-01-01*2022-01-03*2022-01-05*2022-01-06
1002    ["2021-12-12","2021-12-14","2021-12-15"]        ["2021-12-12","2021-12-14","2021-12-15"]        2021-12-12*2021-12-14*2021-12-15
1003    ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] 2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19
Time taken: 9.349 seconds, Fetched: 3 row(s)
复制代码

2.行转列

1.说明

-- 说明 : 将一行数据 转换成 一列数据
-- 使用函数 : explode、lateral view

2.示例

复制代码
-- DDL
create table logintab1 (
`user_id` string comment '用户id',
`login_dates` string comment '登入日期')
 comment '用户登入记录表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DML
insert overwrite table logintab1
select '1001' as id,'2021-12-12*2021-12-20*2022-02-10' as occur_date union all
select '1002' as id,'2021-12-12*2021-12-14*2021-12-15' as occur_date union all
select '1003' as id,'2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19' as occur_date
;

-- 执行sql
select
user_id
,t1.login_date
from logintab1
lateral view explode(split(login_dates,'\\*')) t1 as login_date;

-- 查询结果
user_id t1.login_date
1001    2021-12-12
1001    2021-12-20
1001    2022-02-10
1002    2021-12-12
1002    2021-12-14
1002    2021-12-15
1003    2021-12-15
1003    2021-12-16
1003    2021-12-17
1003    2021-12-18
1003    2021-12-29
1003    2021-12-19
Time taken: 0.032 seconds, Fetched: 12 row(s)
复制代码

3.需求 : 求出uid,name, 琴_成绩,棋_成绩,书_成绩,画_成绩, 如果没有参数某一门考试,结果成绩为0

1.数据准备

复制代码
-- 数据准备
create table userinfo(
uid string comment '学号',
name string comment '姓名',
city string comment '所在城市')
comment '学生基本信息表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DDL
create table scores (
   uid  string comment '学号',
   courseid string comment '课程id',
   score string comment '得分分数')
comment '考试分数记录表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DML
insert overwrite table userinfo
select '1' as uid,'刘备' as name,'保定' as city union all
select '2' as uid,'关羽' as name,'山西' as city union all
select '3' as uid,'赵云' as name,'常山' as city union all
select '4' as uid,'张飞' as name,'涿州' as city ;

insert overwrite table scores
select '1' as uid,'' as courseid,'100' as score union all
select '1' as uid,'' as courseid,'99' as score union all
select '1' as uid,'' as courseid,'88' as score union all
select '1' as uid,'' as courseid,'77' as score union all
select '2' as uid,'' as courseid,'60' as score union all
select '2' as uid,'' as courseid,'50' as score union all
select '3' as uid,'' as courseid,'70' as score union all
select '4' as uid,'' as courseid,'99' as score union all
select '4' as uid,'' as courseid,'77' as score union all
select '4' as uid,'' as courseid,'100' as score union all
select '4' as uid,'' as courseid,'80' as score
;

-- 数据说明
-- userinfo 记录了学生的基本信息
-- scores 记录了考试的分数信息
--        共考试四门功课 琴、棋、书、画,如果有人没有参加某一门的考试,则scores里不会有记录
复制代码

2.执行sql 

复制代码
--需求1: 求出uid,name, 琴_成绩,棋_成绩,书_成绩,画_成绩, 如果没有参数某一门考试,结果成绩为0
--输出结果样例
--+------+---------+------+------+------+------+--+
--| uid  |  name   | 琴_成绩  |棋_成绩  | 书_成绩  | 画_成绩  |
--+------+---------+------+------+------+------+--+
--| 1    | 刘备     | 95   | 60   | 95   | 70   |
--| 2    | 关羽     | 70   | 85   | 80   | 80   |

-- 执行sql
select
t1.uid
,name
,max(if(courseid='',score,0)) as `琴_成绩`
,max(if(courseid='',score,0)) as `棋_成绩`
,max(if(courseid='',score,0)) as `书_成绩`
,max(if(courseid='',score,0)) as `画_成绩`

from scores t1
left outer join userinfo t2
on t1.uid = t2.uid
group by t1.uid
,name
;

--查询结果
t1.uid  name    琴_成绩 棋_成绩 书_成绩 画_成绩
1       刘备    100     99      88      77
2       关羽    60      50      0       0
3       赵云    70      0       0       0
4       张飞    99      77      100     80
复制代码

 

posted @   学而不思则罔!  阅读(141)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· 字符编码:从基础到乱码解决
· SpringCloud带你走进微服务的世界
点击右上角即可分享
微信分享提示