hive求连续段的最后一个数及每段的个数

需求

有一个表存放在数字ID,要求输出连续段的最后一个id及每段的个数;

数据准备

复制代码
create table temp_id_list_0209
(
    id int
) row format delimited
    fields terminated by '\t'
    stored as orc
    tblproperties ("orc.compress" = "snappy");

insert into temp_id_list_0209
values (1),
       (2),
       (4),
       (5),
       (6),
       (9),
       (10),
       (11),
       (12)
;
select * from temp_id_list_0209;
复制代码

实现思路

第一步

select id,
       lag(id, 1, id) over (order by id)      df,
       id - lag(id, 1, id) over (order by id) diff
from temp_id_list_0209;
id
df
diff
1
1
0
2
1
1
4
2
2
5
4
1
6
5
1
9
6
3
10
9
1
11
10
1
12
11
1

第二步

select id, sum(case when diff > 1 then 1 else 0 end) over (order by id) as grep
from (select id,
             id - lag(id, 1, id) over (order by id) as diff
      from temp_id_list_0209) i;
id
grep
1
0
2
0
4
1
5
1
6
1
9
2
10
2
11
2
12
2

第三步

select grp, max(id) last_id, count(id) cnt
from (select id, sum(if(diff > 1, 1, 0)) over (order by id) as grp
      from (select id, id - lag(id, 1, id) over (order by id) as diff
            from temp_id_list_0209) m) n
group by grp;
gre
last_id
cnt
1
6
3
2
12
4
0
2
2

posted @   晓枫的春天  阅读(228)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· 一文读懂知识蒸馏
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
点击右上角即可分享
微信分享提示