hive_面试题_AB球队得分流水表,得到连续三次得分的队员名字和每次赶超对手的球员名字

 


1.数据准备

复制代码
-- DDL
create table bktab (
   team  string comment '球队名称',
   number int comment '球员号码',
   score_time string comment '得分时间',
   score int comment '得分分数',
   name string comment '球员姓名')
comment 'AB球队得分流水表'
row format delimited fields terminated by '\t'
lines terminated by '\n' stored as orc;

-- DML
insert overwrite table bktab
select 'A',1,'2020/8/28 9:01:14',1,'A1' union all
select 'A',5,'2020/8/28 9:02:28',1,'A5' union all
select 'B',4,'2020/8/28 9:03:42',3,'B4' union all
select 'A',4,'2020/8/28 9:04:55',3,'A4' union all
select 'B',1,'2020/8/28 9:06:09',3,'B1' union all
select 'A',3,'2020/8/28 9:07:23',3,'A3' union all
select 'A',4,'2020/8/28 9:08:37',3,'A4' union all
select 'B',1,'2020/8/28 9:09:51',2,'B1' union all
select 'B',2,'2020/8/28 9:11:05',2,'B2' union all
select 'B',4,'2020/8/28 9:12:18',1,'B4' union all
select 'A',1,'2020/8/28 9:13:32',2,'A1' union all
select 'A',1,'2020/8/28 9:13:52',2,'A1' union all
select 'A',1,'2020/8/28 9:14:46',1,'A1' union all
select 'A',4,'2020/8/28 9:16:00',1,'A4' union all
select 'B',3,'2020/8/28 9:17:14',3,'B3' union all
select 'B',2,'2020/8/28 9:18:28',3,'B2' union all
select 'A',2,'2020/8/28 9:19:42',3,'A2' union all
select 'A',1,'2020/8/28 9:20:55',1,'A1' union all
select 'B',3,'2020/8/28 9:22:09',2,'B3' union all
select 'B',3,'2020/8/28 9:23:23',3,'B3' union all
select 'A',5,'2020/8/28 9:24:37',2,'A5' union all
select 'B',1,'2020/8/28 9:25:51',3,'B1' union all
select 'B',2,'2020/8/28 9:27:05',1,'B2' union all
select 'A',3,'2020/8/28 9:28:18',1,'A3' union all
select 'B',4,'2020/8/28 9:29:32',1,'B4' union all
select 'A',1,'2020/8/28 9:30:46',3,'A1' union all
select 'B',1,'2020/8/28 9:31:00',1,'B1' union all
select 'B',1,'2020/8/28 9:32:00',1,'B1' union all
select 'A',4,'2020/8/28 9:33:14',2,'A4' union all
select 'B',1,'2020/8/28 9:34:28',1,'B1' union all
select 'B',5,'2020/8/28 9:35:42',2,'B5' union all
select 'A',1,'2020/8/28 9:36:55',1,'A1' union all
select 'A',1,'2020/8/28 9:37:50',1,'A1' union all
select 'B',1,'2020/8/28 9:38:09',3,'B1' union all
select 'A',1,'2020/8/28 9:39:23',3,'A1' union all
select 'B',2,'2020/8/28 9:40:37',3,'B2' union all
select 'A',3,'2020/8/28 9:41:51',3,'A3' union all
select 'A',1,'2020/8/28 9:43:05',2,'A1' union all
select 'B',3,'2020/8/28 9:44:18',3,'B3' union all
select 'A',5,'2020/8/28 9:45:32',2,'A5' union all
select 'B',5,'2020/8/28 9:46:46',3,'B5' ;

-- 数据说明
-- bktab记录了 AB球队得分流水表
-- 每次球员进球后,就会插入一条记录,记录信息为(球队名称team,球员号码number,球员姓名name,,得分分数score 以及得分时间scoretime)
复制代码
需求1 : 连续三次(及以上)为球队得分的球员名单

分析思路1(获取前置元素 累计求和分组法)
复制代码
--分析思路1(获取前置元素 累计求和分组法)
-- 1.按team分组,按score_time 正向排序
-- 2.获取当前行的前一行 name
-- 3.判断当前行的name 是否与前一行的name是否相同,添加标记 不同为1 相同为0
-- 4.对标记累计求和,作为连续得分分组
-- 5.对分区计数,得出 连续得分次数


-- 执行sql
select
name
,cont_group
,count(name) as contin_cnt

from (
    select
        team
        ,number
        ,score_time
        ,score
        ,name
        ,pre_name
        ,if_contin
        -- 累计求和,获取连续分组
        ,sum(if_contin) over (partition by team order by score_time asc) as cont_group

    from (
        select
        team
        ,number
        ,score_time
        ,score
        ,name
        -- 获取当前行的前一行的 name
        ,lag(name) over (partition by team order by score_time asc) as pre_name
        -- 判断 pre_name 和name 是否相同
        ,if(lag(name) over (partition by team order by score_time asc) = name
            ,0,1
            ) as if_contin
        from bktab
    ) t1
) t2
group by name
,cont_group
-- 通过这里限制 连续得分次数
having count(name) >= 3
;

-- 查询结果
name    cont_group      contin_cnt
A1      6       3
A1      14      3
B1      11      3
Time taken: 38.164 seconds, Fetched: 3 row(s)
复制代码
思路2(观察连续特征法)
复制代码
-- 1.判断当前行连续特征
当前 前1   前2   后1  后2
A1  null  null A1   A1
A1  A1    null A1   null
A1  A1    A1   null null

当前 = 后1 and 当前 = 后2 | 当前 = 前1 and 当前 = 后1 | 当前 = 前1 and 当前 = 前1


-- 执行sql
select distinct a.name ,a.team from
(
select *,lead(name,1) over(partition by team order by score_time) as ld1
,lead(name,2) over(partition by team order by score_time) as ld2
,lag(name,1) over(partition by team order by score_time) as lg1
,lag(name,2) over(partition by team order by score_time) as lg2
from bktab
) a
where (a.name =a.ld1 and a.name =a.ld2)
or (a.name =a.ld1 and a.name =a.lg1)
or (a.name=a.lg1 and a.name=a.lg2);

-- 查询结果
a.name  a.team
A1      A
B1      B
Time taken: 23.196 seconds, Fetched: 2 row(s)
复制代码
需求2 : 比赛中帮助各自球队反超比分的球员姓名以及对应时间
复制代码
--分析思路
    1.按score_time 对全局排序
    2.获取当前行 A队累计得分 B队累计得分
    3.获取 当前 A队累计得分 与 B队累计得分的差值
    4.当前行差值 与上一行差值,发生符合变化时,表示 分数发生了反超

-- 查询sql
select
    team
    ,number
    ,score_time
    ,score
    ,name
    ,ateam_score
    ,bteam_score
from (

    select
        team
        ,number
        ,score_time
        ,score
        ,name
        ,ateam_score
        ,bteam_score
        ,diff_score
        ,lag(diff_score) over (order by score_time asc) as pre_diff_score
        ,case when diff_score > 0 and lag(diff_score) over (order by score_time asc) < 0 then 1
              when diff_score < 0 and lag(diff_score) over (order by score_time asc) > 0 then 1
              when diff_score is not null and lag(diff_score) over (order by score_time asc) is null then 1
              else 0
         end as if_surpass

    from (
        select
        team
        ,number
        ,score_time
        ,score
        ,name
        ,sum(if(team = 'A',score,0)) over (order by score_time asc) as ateam_score
        ,sum(if(team = 'B',score,0)) over (order by score_time asc) as bteam_score
        ,sum(if(team = 'A',score,0)) over (order by score_time asc) - sum(if(team = 'B',score,0)) over (order by score_time asc) as diff_score
        from bktab
    ) t1
) t2
where if_surpass = 1
;

-- 执行结果
team    number  score_time      score   name    ateam_score     bteam_score
A       1       2020/8/28 9:01:14       1       A1      1       0
B       4       2020/8/28 9:03:42       3       B4      2       3
A       4       2020/8/28 9:04:55       3       A4      5       3
B       1       2020/8/28 9:06:09       3       B1      5       6
A       3       2020/8/28 9:07:23       3       A3      8       6
B       3       2020/8/28 9:23:23       3       B3      21      22
A       5       2020/8/28 9:24:37       2       A5      23      22
B       1       2020/8/28 9:25:51       3       B1      23      25
A       1       2020/8/28 9:43:05       2       A1      39      38
B       3       2020/8/28 9:44:18       3       B3      39      41
复制代码

 







posted @   学而不思则罔!  阅读(248)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 10年+ .NET Coder 心语 ── 封装的思维:从隐藏、稳定开始理解其本质意义
· 地球OL攻略 —— 某应届生求职总结
· 提示词工程——AI应用必不可少的技术
· 字符编码:从基础到乱码解决
· SpringCloud带你走进微服务的世界
点击右上角即可分享
微信分享提示