hive求连续段的最后一个数及每段的个数
需求
有一个表存放在数字ID,要求输出连续段的最后一个id及每段的个数;
数据准备
create table temp_id_list_0209 ( id int ) row format delimited fields terminated by '\t' stored as orc tblproperties ("orc.compress" = "snappy"); insert into temp_id_list_0209 values (1), (2), (4), (5), (6), (9), (10), (11), (12) ; select * from temp_id_list_0209;
实现思路
第一步
select id, lag(id, 1, id) over (order by id) df, id - lag(id, 1, id) over (order by id) diff from temp_id_list_0209;
id
|
df
|
diff
|
1
|
1
|
0
|
2
|
1
|
1
|
4
|
2
|
2
|
5
|
4
|
1
|
6
|
5
|
1
|
9
|
6
|
3
|
10
|
9
|
1
|
11
|
10
|
1
|
12
|
11
|
1
|
第二步
select id, sum(case when diff > 1 then 1 else 0 end) over (order by id) as grep from (select id, id - lag(id, 1, id) over (order by id) as diff from temp_id_list_0209) i;
id
|
grep
|
1
|
0
|
2
|
0
|
4
|
1
|
5
|
1
|
6
|
1
|
9
|
2
|
10
|
2
|
11
|
2
|
12
|
2
|
第三步
select grp, max(id) last_id, count(id) cnt from (select id, sum(if(diff > 1, 1, 0)) over (order by id) as grp from (select id, id - lag(id, 1, id) over (order by id) as diff from temp_id_list_0209) m) n group by grp;
gre
|
last_id
|
cnt
|
1
|
6
|
3
|
2
|
12
|
4
|
0
|
2
|
2
|