hive Group操作

GROUPING SETS使用

grouping sets是一种将多个group by 逻辑写在一个sql语句中的便利写法。

--GROUP BY a, b 设置分组字段
--GROUPING SETS ((a,b)) 分组方式
SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a,b))
等于
SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b

--GROUP BY a, b GROUPING SETS ((a,b), a)
SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a,b), a)

等于

SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b 
UNION ALL
SELECT a, null, SUM(c) FROM tab1 GROUP BY a


--GROUP BY a, b GROUPING SETS (a,b)
SELECT a,b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS (a,b)
等于

SELECT a, null, SUM(c) FROM tab1 GROUP BY a 
UNION ALL
SELECT null, b, SUM(c) FROM tab1 GROUP BY b

--GROUP BY a, b GROUPING SETS ((a, b), a, b, ())
SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b GROUPING SETS ((a, b), a, b, ())

等于
SELECT a, b, SUM(c) FROM tab1 GROUP BY a, b 
UNION ALL
SELECT a, null, SUM(c) FROM tab1 GROUP BY a
UNION ALL
SELECT null, b, SUM(c) FROM tab1 GROUP BY b 
UNION ALL
SELECT null, null, SUM(c) FROM tab1

常用于计算各种组合的报表数据。

select * from (
SELECT a, b, SUM( c ) FROM tab1 GROUP BY a, b 
UNION ALL
SELECT a, null, SUM( c ) FROM tab1 GROUP BY a
UNION ALL
SELECT null, b, SUM( c ) FROM tab1 GROUP BY b 
UNION ALL
SELECT null, null, SUM( c ) FROM tab1
    ) d 
        情况1 : 
        where d.a is null and d.b is null;
        情况2 : 
        where d.a is null and d.b is not null;
        情况2 : 
        where d.a is not null and d.b is null;
        情况4 : 
        where d.a is not null and d.b is not null;

示例
创建student_grouping表

CREATE TABLE student_grouping(
id int,
name string,
age int,
sex string 
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';

-- 分组
group by age group by sex group by age,sex 查询全部

给表导入数据

1   name1   12  boy
2   name2   12  boy
3   name3   13  girl
4   name4   13  boy
5   name5   14  boy
6   name6   14  boy
7   name7   15  girl
8   name8   15  girl

查询数据
查询分组数据

SELECT age, sex, count(id) FROM student_grouping GROUP BY age, sex GROUPING SETS ((age,sex),age,sex,());
GROUP BY age, sex 设置分组字段
GROUPING SETS ((age,sex),age,sex,()) 分组方式

Total MapReduce CPU Time Spent: 4 seconds 750 msec
OK
NULL    NULL    8
NULL    boy     5
NULL    girl    3
12      NULL    2
12      boy     2
13      NULL    2
13      boy     1
13      girl    1
14      NULL    2
14      boy     2
15      NULL    2
15      girl    2

等于
select age, sex, count(id) from student_grouping group by age,sex; = (age,sex)
以age,sex为组
Total MapReduce CPU Time Spent: 9 seconds 550 msec
OK
12      boy     2
13      boy     1
13      girl    1
14      boy     2
15      girl    2

select age, null, count(id) from student_grouping group by age; = age
以age为组
Total MapReduce CPU Time Spent: 8 seconds 920 msec
OK
12      NULL    2
13      NULL    2
14      NULL    2
15      NULL    2

select null, sex, count(id) from student_grouping group by sex; = sex
以sex为组
Total MapReduce CPU Time Spent: 8 seconds 920 msec
OK
NULL    boy     5
NULL    girl    3

select null,null,count(id) from student_grouping; = ()
求总数
Total MapReduce CPU Time Spent: 9 seconds 190 msec
OK
NULL    NULL    8

grouping sets加上if,case用法

select * from 
(SELECT age, sex, count(id) c FROM student_grouping GROUP BY age, sex GROUPING SETS ((age,sex),age,sex,())) a  
where a.age is null and a.sex is not null;

select 
if(age is not null,age,'ALL'),sex,c from 
(SELECT age, sex, count(id) c FROM student_grouping GROUP BY age, sex GROUPING SETS ((age,sex),age,sex,())) a  
where a.age is null and a.sex is not null;

select 
age,if(sex is not null,sex,'ALL'),c from 
(SELECT age, sex, count(id) c FROM student_grouping GROUP BY age, sex GROUPING SETS ((age,sex),age,sex,())) a  
where a.age is not null and a.sex is null;

-- 除了if 还可以使用 case when
SELECT 
if(age is not null, age, 'ALL'), 
case 
	when sex is not null then sex else 'ALL' end 
as age,count(id) 
FROM student_grouping 
GROUP BY age, sex GROUPING SETS ( (age,sex),age,sex,() );
age sex count
ALL	ALL	8 所有性别和所有年龄 = 8
ALL	boy	5 所有年龄段的男孩 = 5
ALL	girl	3 所有年龄段的女孩 = 3
12	ALL	2 12岁的所有孩子 = 2
12	boy	2 12岁的男孩 = 2
13	ALL	2
13	boy	1
13	girl	1
14	ALL	2
14	boy	2
15	ALL	2
15	girl	2

with cube 是group by中所有key的组合非常多

根据提供的group by的分组字段的所有组合进行分组数据的查询
举例说明 :
假如 group by a,b,c
with cube =GROUPING SETS ((a,b,c),(a,b),(b,c),(a,c),a,b,c,())
所以上面的写法可简写成

SELECT if(age is not null, age, 'ALL'), 
case when sex is not null then sex else 'ALL' end as age,
count(id) 
FROM student_grouping 
GROUP BY age, sex with cube;

-- GROUP BY age, sex

-- with cube (age,sex),age,sex,()

原来的写法

SELECT if(age is not null, age, 'ALL'), 
case when sex is not null then sex
else 'ALL'
end as age,
count(id) 
FROM student_grouping 
GROUP BY age, sex GROUPING SETS ( (age,sex),age,sex,() );

with rollup是按右侧递减的顺序组合

例如：
group by a,b,c
with rollup
GROUPING SETS ((a,b,c),(a,b),a,())

实际案例 : 年龄性别国家 --> 1. 年龄性别国家 2. 年龄性别 3. 年龄 4 () with rollup
grouping sets()

右侧递减的意思就是从左到右依次减去最右面的;

-- GROUP BY age, sex with rollup 等效于  GROUP BY age, sex GROUPING SETS ( (age,sex),age,() )
-- 相当于按右侧递减的顺序group by
SELECT if(age is not null, age, 'ALL'), 
case when sex is not null then sex else 'ALL' end as age,
count(id) FROM student_grouping GROUP BY age, sex with rollup;
--等于
SELECT if(age is not null, age, 'ALL'), 
case 
	when sex is not null then sex
	else 'ALL'
end as age,
count(id) FROM student_grouping GROUP BY age, sex
GROUPING SETS ( (age,sex),age,() );

posted @ 2020-05-31 23:49 我不是忘尘阅读(350) 评论(0) 编辑收藏举报

刷新页面返回顶部

我不是忘尘

hive Group操作

目录

GROUPING SETS使用

grouping sets加上if,case用法

with cube 是group by中所有key的组合非常多

with rollup是按右侧递减的顺序组合

公告

我不是忘尘

hive Group操作

目录

GROUPING SETS使用

grouping sets加上if,case用法

with cube 是group by中所有key的组合 非常多

with rollup是 按右侧递减的顺序组合

公告

with cube 是group by中所有key的组合非常多

with rollup是按右侧递减的顺序组合