hive求连续段的最后一个数及每段的个数

需求

有一个表存放在数字ID,要求输出连续段的最后一个id及每段的个数;

数据准备

create table temp_id_list_0209
(
    id int
) row format delimited
    fields terminated by '\t'
    stored as orc
    tblproperties ("orc.compress" = "snappy");

insert into temp_id_list_0209
values (1),
       (2),
       (4),
       (5),
       (6),
       (9),
       (10),
       (11),
       (12)
;
select * from temp_id_list_0209;

实现思路

第一步

select id,
       lag(id, 1, id) over (order by id)      df,
       id - lag(id, 1, id) over (order by id) diff
from temp_id_list_0209;
id
df
diff
1
1
0
2
1
1
4
2
2
5
4
1
6
5
1
9
6
3
10
9
1
11
10
1
12
11
1

第二步

select id, sum(case when diff > 1 then 1 else 0 end) over (order by id) as grep
from (select id,
             id - lag(id, 1, id) over (order by id) as diff
      from temp_id_list_0209) i;
id
grep
1
0
2
0
4
1
5
1
6
1
9
2
10
2
11
2
12
2

第三步

select grp, max(id) last_id, count(id) cnt
from (select id, sum(if(diff > 1, 1, 0)) over (order by id) as grp
      from (select id, id - lag(id, 1, id) over (order by id) as diff
            from temp_id_list_0209) m) n
group by grp;
gre
last_id
cnt
1
6
3
2
12
4
0
2
2

posted @ 2022-02-10 12:09  晓枫的春天  阅读(221)  评论(0编辑  收藏  举报