|NO.Z.00016|——————————|BigDataEnd|——|Hadoop&Hive.V16|——|Hive.v16|Hive函数用法.v04|

一、SQL面试题
### --- 连续7天登录的用户

~~~     # 数据。uid dt status(1 正常登录,0 异常)
[root@linux123 ~]# vim /home/hadoop/data/ulogin.dat
1   2019-07-11  1
1   2019-07-12  1
1   2019-07-13  1
1   2019-07-14  1
1   2019-07-15  1
1   2019-07-16  1
1   2019-07-17  1
1   2019-07-18  1
2   2019-07-11  1
2   2019-07-12  1
2   2019-07-13  0
2   2019-07-14  1
2   2019-07-15  1
2   2019-07-16  0
2   2019-07-17  1
2   2019-07-18  0
3   2019-07-11  1
3   2019-07-12  1
3   2019-07-13  1
3   2019-07-14  0
3   2019-07-15  1
3   2019-07-16  1
3   2019-07-17  1
3   2019-07-18  1
~~~     # 建表语句

hive (mydb)> create table ulogin(
uid int,
dt date,
status int
)
row format delimited fields terminated by ' ';
-- 加载数据
hive (mydb)> load data local inpath '/home/hadoop/data/ulogin.dat' into
table ulogin;
### --- 连续值的求解,面试中常见的问题。这也是同一类,基本都可按照以下思路进行
~~~     使用 row_number 在组内给数据编号(rownum)
~~~     某个值 - rownum = gid,得到结果可以作为后面分组计算的依据
~~~     根据求得的gid,作为分组条件,求最终结果

hive (mydb)> select uid, dt,
date_sub(dt, row_number() over (partition by uid order
by dt)) gid
from ulogin
where status=1;
--输出参数
uid dt  gid
Time taken: 45.615 seconds
hive (mydb)> select uid, count(*) logincount
from (select uid, dt,
date_sub(dt, row_number() over (partition by
uid order by dt)) gid
from ulogin
where status=1) t1
group by uid, gid
having logincount>=7;
--参数输出
Time taken: 82.121 seconds
二、编写sql语句实现每班前三名,分数一样并列,同时求出前三名按名次排序的分差
### --- 数据。sid class score

[root@linux123 ~]# vim /home/hadoop/data/stu.dat
1   1901    90
2   1901    90
3   1901    83
4   1901    60
5   1902    66
6   1902    23
7   1902    99
8   1902    67
9   1902    87
-- 待求结果数据如下:
class score rank lagscore
1901 90 1 0
1901 90 1 0
1901 83 2 -7
1901 60 3 -23
1902 99 1 0
1902 87 2 -12
1902 67 3 -20
~~~     # 建表语句
hive (mydb)> create table stu(
sno int,
class string,
    score int
)row format delimited fields terminated by ' ';

~~~     # 加载数据
hive (mydb)> load data local inpath '/home/hadoop/data/stu.dat' into table stu;
### --- 求解思路:
~~~     上排名函数,分数一样并列,所以用dense_rank
~~~     将上一行数据下移,相减即得到分数差
~~~     处理 NULL

hive (mydb)> with tmp as (
select sno, class, score,
dense_rank() over (partition by class order by score
desc) as rank
from stu
)
select class, score, rank,
nvl(score - lag(score) over (partition by class order
by score desc), 0) lagscore
from tmp
where rank<=3;
--参数输出
class   score   rank    lagscore
NULL    NULL    1   0
NULL    NULL    1   0
NULL    NULL    1   0
NULL    NULL    1   0
三、行 <=> 列
### --- 数据:id course

[root@linux123 ~]# vim /home/hadoop/data/data1.dat
1   java
1   hadoop
1   hive
1   hbase
2   java
2   hive
2   spark
2   flink
3   java
3   hadoop
3   hive
3   kafka
### --- 建表加载数据

hive (mydb)> create table rowline1(
id string,
course string
    )row format delimited fields terminated by ' ';
hive (mydb)> load data local inpath '/home/hadoop/data/data1.dat' into table rowline1;
### --- 编写sql,得到结果如下(1表示选修,0表示未选修)

hive (mydb)> id java hadoop hive hbase spark flink kafka
1 1 1 1 1 0 0 0
2 1 0 1 0 1 1 0
3 1 1 1 0 0 0 1
### --- 使用case whengroup by + sum

hive (mydb)> select id,
sum(case when course="java" then 1 else 0 end) as java,
sum(case when course="hadoop" then 1 else 0 end) as hadoop,
sum(case when course="hive" then 1 else 0 end) as hive,
sum(case when course="hbase" then 1 else 0 end) as hbase,
sum(case when course="spark" then 1 else 0 end) as spark,
sum(case when course="flink" then 1 else 0 end) as flink,
sum(case when course="kafka" then 1 else 0 end) as kafka
from rowline1
group by id;
--输出参数
id  java    hadoop  hive    hbase   spark   flink   kafka
1   hive    0   0   0   0   0   0   0
2   java    0   0   0   0   0   0   0
2   spark   0   0   0   0   0   0   0

### --- 数据。id1 id2 flag

[root@linux123 ~]# vim /home/hadoop/data/data2.dat
a   b   2
a   b   1
a   b   3
c   d   6
c   d   8
c   d   8
-- 编写sql实现如下结果
id1 id2 flag
a b 2|1|3
c d 6|8
### --- 创建表 & 加载数据

hive (mydb)> create table rowline2(
id1 string,
id2 string,
flag int
) row format delimited fields terminated by ' ';
hive (mydb)> load data local inpath '/home/hadoop/data/data2.dat' into table rowline2;
### --- 第一步 将元素聚拢

hive (mydb)> select id1, id2, collect_set(flag) flag from rowline2 group by id1, id2;
--参数输出
id1 id2 flag
a   b   2   NULL    []
a   b   3   NULL    []
c   d   8   NULL    []
a   b   1   NULL    []
c   d   6   NULL    []

hive (mydb)> select id1, id2, collect_list(flag) flag from rowline2 group by id1, id2;
--参数输出
id1 id2 flag
a   b   2   NULL    []
a   b   3   NULL    []
c   d   8   NULL    []
a   b   1   NULL    []
c   d   6   NULL    []

hive (mydb)> select id1, id2, sort_array(collect_set(flag)) flag from rowline2 group by id1, id2;
--参数输出
id1 id2 flag
a   b   2   NULL    []
a   b   3   NULL    []
c   d   8   NULL    []
a   b   1   NULL    []
c   d   6   NULL    []
### --- 第二步 将元素连接在一起
hive (mydb)> select id1, id2, concat_ws("|", collect_set(flag)) flag
from rowline2
group by id1, id2;

~~~     这里报错,CONCAT_WS must be "string or array<string>"。加一个类型转换即可
hive (mydb)> select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;
--参数输出
id1 id2 flag
a   b   2   NULL    
a   b   3   NULL    
c   d   8   NULL    
a   b   1   NULL    
c   d   6   NULL

### --- 创建表 rowline3

hive (mydb)> create table rowline3 as
select id1, id2, concat_ws("|", collect_set(cast (flag as
string))) flag
from rowline2
group by id1, id2;
--参数输出
id1 id2 flag
Time taken: 43.839 seconds
~~~     # 第一步:将复杂的数据展开

hive (mydb)> select explode(split(flag, "\\|")) flat from rowline3;
--输出参数
flat
~~~     # 第二步:lateral view 后与其他字段关联

hive (mydb)> select id1, id2, newflag
from rowline3 lateral view explode(split(flag, "\\|")) t1 as newflag;
--输出参数
id1 id2 newflag
a   b   2   NULL    
a   b   3   NULL    
c   d   8   NULL    
a   b   1   NULL    
c   d   6   NULL

hive (mydb)> lateralView: LATERAL VIEW udtf(expression) tableAlias AS
columnAlias (',' columnAlias)*
fromClause: FROM baseTable (lateralView)*

 
 
 
 
 
 
 
 
 

Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
                                                                                                                                                   ——W.S.Landor

 

 

posted on   yanqi_vip  阅读(20)  评论(0编辑  收藏  举报

相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

导航

统计

点击右上角即可分享
微信分享提示