hive_面试题_行转列&列转行
1.列转行
1.说明
-- 说明 : 将 一列数据 转换成一行数据 -- 使用函数 : collect_set : 返回分组内元素 的迭代器(对元素去重) collect_list : 返回分组内元素 的迭代器(对元素不去重) concat_ws('指定分隔符',iter) : 返回 将所有元素用指定分隔符拼接的字符串 类似 iter.mkString("分隔符")
2.示例
-- 数据准备 -- DDL create table logintab ( `user_id` string comment '用户id', `login_date` string comment '登入日期') comment '用户登入记录表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table logintab select '1001' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-20' as occur_date union all select '1001' as id,'2022-02-10' as occur_date union all select '1001' as id,'2021-12-20' as occur_date union all select '1002' as id,'2021-12-12' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1001' as id,'2021-12-13' as occur_date union all select '1002' as id,'2021-12-14' as occur_date union all select '1001' as id,'2021-12-14' as occur_date union all select '1002' as id,'2021-12-15' as occur_date union all select '1001' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-15' as occur_date union all select '1003' as id,'2021-12-16' as occur_date union all select '1003' as id,'2021-12-17' as occur_date union all select '1003' as id,'2021-12-18' as occur_date union all select '1003' as id,'2021-12-29' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-01' as occur_date union all select '1001' as id,'2022-01-03' as occur_date union all select '1001' as id,'2022-01-05' as occur_date union all select '1001' as id,'2022-01-06' as occur_date union all select '1003' as id,'2021-12-19' as occur_date ; -- 操作sql select user_id ,collect_list(login_date) as list1 ,collect_set(login_date) as set1 ,concat_ws('*',collect_set(login_date)) as ws_set from logintab group by user_id; -- 查询结果 user_id list1 set1 ws_set 1001 ["2021-12-12","2021-12-20","2022-02-10","2021-12-20","2021-12-13","2021-12-13","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-01","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] ["2021-12-12","2021-12-20","2022-02-10","2021-12-13","2021-12-14","2021-12-15","2022-01-01","2022-01-03","2022-01-05","2022-01-06"] 2021-12-12*2021-12-20*2022-02-10*2021-12-13*2021-12-14*2021-12-15*2022-01-01*2022-01-03*2022-01-05*2022-01-06 1002 ["2021-12-12","2021-12-14","2021-12-15"] ["2021-12-12","2021-12-14","2021-12-15"] 2021-12-12*2021-12-14*2021-12-15 1003 ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] ["2021-12-15","2021-12-16","2021-12-17","2021-12-18","2021-12-29","2021-12-19"] 2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19 Time taken: 9.349 seconds, Fetched: 3 row(s)
2.行转列
1.说明
-- 说明 : 将一行数据 转换成 一列数据 -- 使用函数 : explode、lateral view
2.示例
-- DDL create table logintab1 ( `user_id` string comment '用户id', `login_dates` string comment '登入日期') comment '用户登入记录表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table logintab1 select '1001' as id,'2021-12-12*2021-12-20*2022-02-10' as occur_date union all select '1002' as id,'2021-12-12*2021-12-14*2021-12-15' as occur_date union all select '1003' as id,'2021-12-15*2021-12-16*2021-12-17*2021-12-18*2021-12-29*2021-12-19' as occur_date ; -- 执行sql select user_id ,t1.login_date from logintab1 lateral view explode(split(login_dates,'\\*')) t1 as login_date; -- 查询结果 user_id t1.login_date 1001 2021-12-12 1001 2021-12-20 1001 2022-02-10 1002 2021-12-12 1002 2021-12-14 1002 2021-12-15 1003 2021-12-15 1003 2021-12-16 1003 2021-12-17 1003 2021-12-18 1003 2021-12-29 1003 2021-12-19 Time taken: 0.032 seconds, Fetched: 12 row(s)
3.需求 : 求出uid,name, 琴_成绩,棋_成绩,书_成绩,画_成绩, 如果没有参数某一门考试,结果成绩为0
1.数据准备
-- 数据准备 create table userinfo( uid string comment '学号', name string comment '姓名', city string comment '所在城市') comment '学生基本信息表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DDL create table scores ( uid string comment '学号', courseid string comment '课程id', score string comment '得分分数') comment '考试分数记录表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table userinfo select '1' as uid,'刘备' as name,'保定' as city union all select '2' as uid,'关羽' as name,'山西' as city union all select '3' as uid,'赵云' as name,'常山' as city union all select '4' as uid,'张飞' as name,'涿州' as city ; insert overwrite table scores select '1' as uid,'琴' as courseid,'100' as score union all select '1' as uid,'棋' as courseid,'99' as score union all select '1' as uid,'书' as courseid,'88' as score union all select '1' as uid,'画' as courseid,'77' as score union all select '2' as uid,'琴' as courseid,'60' as score union all select '2' as uid,'棋' as courseid,'50' as score union all select '3' as uid,'琴' as courseid,'70' as score union all select '4' as uid,'琴' as courseid,'99' as score union all select '4' as uid,'棋' as courseid,'77' as score union all select '4' as uid,'书' as courseid,'100' as score union all select '4' as uid,'画' as courseid,'80' as score ; -- 数据说明 -- userinfo 记录了学生的基本信息 -- scores 记录了考试的分数信息 -- 共考试四门功课 琴、棋、书、画,如果有人没有参加某一门的考试,则scores里不会有记录
2.执行sql
--需求1: 求出uid,name, 琴_成绩,棋_成绩,书_成绩,画_成绩, 如果没有参数某一门考试,结果成绩为0 --输出结果样例 --+------+---------+------+------+------+------+--+ --| uid | name | 琴_成绩 |棋_成绩 | 书_成绩 | 画_成绩 | --+------+---------+------+------+------+------+--+ --| 1 | 刘备 | 95 | 60 | 95 | 70 | --| 2 | 关羽 | 70 | 85 | 80 | 80 | -- 执行sql select t1.uid ,name ,max(if(courseid='琴',score,0)) as `琴_成绩` ,max(if(courseid='棋',score,0)) as `棋_成绩` ,max(if(courseid='书',score,0)) as `书_成绩` ,max(if(courseid='画',score,0)) as `画_成绩` from scores t1 left outer join userinfo t2 on t1.uid = t2.uid group by t1.uid ,name ; --查询结果 t1.uid name 琴_成绩 棋_成绩 书_成绩 画_成绩 1 刘备 100 99 88 77 2 关羽 60 50 0 0 3 赵云 70 0 0 0 4 张飞 99 77 100 80
分类:
Hive
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· 字符编码:从基础到乱码解决
· SpringCloud带你走进微服务的世界