第四模块MySQL50题作业,以及由作业引申出来的一些高端玩法
一、表关系
先参照如下表结构创建7张表格,并创建相关约束
班级表:class | 学生表:student | ||||||
cid | caption | grade_id | sid | sname | gender | class_id | |
1 | 一年一班 | 1 | 1 | 乔丹 | 女 | 1 | |
2 | 二年一班 | 2 | 2 | 艾弗森 | 女 | 1 | |
3 | 三年二班 | 3 | 3 | 科比 | 男 | 2 | |
老师表:teacher | 课程表:course | ||||||
tid | tname | cid | cname | teacher_id | |||
1 | 张三 | 1 | 生物 | 1 | |||
2 | 李四 | 2 | 体育 | 1 | |||
3 | 王五 | 3 | 物理 | 2 | |||
成绩表:score |
年级表: class_grade |
||||||
sid | student_id | course_id | score | gid | gname | ||
1 | 1 | 1 | 60 | 1 | 一年级 | ||
2 | 1 | 2 | 59 | 2 | 二年级 | ||
3 | 2 | 2 | 99 | 3 | 三年级 | ||
班级任职表:teach2cls | |||||||
tcid | tid | cid | |||||
1 | 1 | 1 | |||||
2 | 1 | 2 | |||||
3 | 2 | 1 | |||||
4 | 3 | 2 |
1. 班级表class
create table class ( cid int primary key auto_increment, caption char(10), grade_id int );
insert into class values (1,'少一一班',1), (2,'少二一班',2), (3,'少三二班',3), (4,'少四一班',4), (5,'少五三班',5);
2. 学生表student
create table student ( sid int primary key auto_increment, sname char(10), gender enum('男','女') not null, class_id int );
insert into student values (1,'乔丹','女',1), (2,'艾弗森','女',1), (3,'科比','男',2), (4,'葫芦娃','男',3), (5,'张三丰','男',5), (6,'洞房不败','男',4), (7,'樱木花道','男',2), (8,'松岛菜菜子','女',3), (9,'洞房不败','女',5);
3. 老师表teacher
create table teacher ( tid int primary key auto_increment, tname char(10) );
insert into teacher values (1,'张三'), (2,'李四'), (3,'王五'), (4,'萧峰'), (5,'一休哥'), (6,'诸葛'), (7,'李四');
4. 课程表course
create table course ( cid int primary key auto_increment, cname char(10), teacher_id int );
insert into course values (1,'生物',1), (2,'体育',1), (3,'物理',2), (4,'数学',3), (5,'语文',4), (6,'英语',2), (7,'土遁?沙地送葬',5), (8,'夏日喂蚊子大法',3), (9,'麻将牌九扑克千术',6);
5. 成绩表score
create table score ( sid int primary key auto_increment, student_id int, course_id int, score int );
insert score values (1,1,1,60), (2,1,2,21), (3,2,2,99), (4,3,3,56), (5,4,1,56), (6,5,3,94), (7,5,4,40), (8,6,4,80), (9,7,3,37), (10,8,5,100), (11,8,6,89), (12,8,7,0), (13,3,8,45), (14,7,1,89), (15,2,7,89), (16,2,1,61);
6. 年级表class_grade
create table class_grade ( gid int primary key auto_increment, gname char(10) );
insert class_grade values (1,'少一年级'), (2,'少二年级'), (3,'少三年级'), (4,'少四年级'), (5,'少五年级');
7. 班级任职表teach2cls
create table teach2cls ( tcid int primary key auto_increment, tid int, cid int );
insert into teach2cls values (1,1,1), (2,1,2), (3,2,1), (4,3,2), (5,4,5), (6,5,3), (7,5,5), (8,6,2), (9,6,4), (10,6,3), (11,4,1), (12,1,4);
二、操作表
★注:由于样本数量有限,为了能够得到足够的查询结果,所有题目中涉及到“超过”或“以上”字样的,均默认为包含该值
(例如:查询教授课程超过2门的老师的id和姓名,视作教授课程数>=2)
1、自行创建测试数据;
(创建语句见"一、表关系")
2、查询学生总人数
select count(*) as 学生总人数 from student;
3、查询“生物”课程和“物理”课程成绩都及格的学生id和姓名
【查法1——子查询】
select sid, sname from student where sid in ( select student_id from score where student_id in (select student_id from score where course_id = (select cid from course where cname = '生物') and score >= 60) and course_id = (select cid from course where cname = '物理') and score >= 60 );
【查法2——联表】
select sid, sname from student where sid in ( select t1.student_id from ( select student_id from score where course_id = (select cid from course where cname = '生物') and score >= 60 ) as t1 inner join ( select student_id from score where course_id = (select cid from course where cname = '物理') and score >= 60 ) as t2 on t1.student_id=t2.student_id );
4、查询每个年级的班级数,取出班级数最多的前三个年级
select class.grade_id, class_grade.gname, count(class.cid) as 班级数 from class inner join class_grade on class.grade_id=class_grade.gid group by class.grade_id order by count(class.cid) desc limit 3;
5、查询平均成绩最高和最低的学生的id和姓名以及平均成绩
select stu.sid, stu.sname, avg(score) as 平均成绩 from student as stu inner join score as sco on stu.sid = sco.student_id group by stu.sid having avg(score) = ( select avg(score) from score group by student_id order by avg(score) desc limit 1 ) or avg(score) = ( select avg(score) from score group by student_id order by avg(score) asc limit 1 );
6、查询每个年级的学生人数
select t1.gname, count(s.sid) as 学生人数 from ( select * from class as c inner join class_grade as g on c.grade_id = g.gid ) as t1 inner join student as s on t1.cid = s.class_id group by t1.gid;
7、查询每位学生的学号,姓名,选课数,平均成绩
select stu.sid as 学号, stu.sname as 姓名, count(sco.course_id) as 选课数, avg(sco.score) as 平均成绩 from student as stu left join score as sco on stu.sid = sco.student_id group by sco.student_id;
8、查询学生编号为“2”的学生的姓名、该学生成绩最高的课程名、成绩最低的课程名及分数
select t1.sname as 姓名, t2.cname as 课程名, t1.score as 分数 from (select stu.sid, stu.sname, sco.course_id, sco.score from student as stu inner join score as sco on stu.sid = sco.student_id where stu.sid=2) as t1 inner join course as t2 on t1.course_id = t2.cid group by t2.cid having score in (max(score),min(score));
9、查询姓“李”的老师的个数和所带班级数;
select count(te.tid) as 姓李老师个数, count(tc.cid) as 所带班级数 from teacher as te inner join teach2cls as tc on te.tid = tc.tid where te.tname regexp "^李.*" group by te.tid;
10、查询班级数小于5的年级id和年级名;
select c.grade_id as 年级id, g.gname as 年级名 from class as c inner join class_grade as g on c.grade_id = g.gid group by c.grade_id having count(c.cid)<5;
11、查询班级信息,包括班级id、班级名称、年级、年级级别(12为低年级,34为中年级,56为高年级),示例结果如下;
select cid as 班级id, caption as 班级名称, gname as 年级, case when g.gid in (1,2) then '低年级' when g.gid in (3,4) then '中年级' when g.gid in (5,6) then '高年级' else '其他' end as 年级级别 from class as c inner join class_grade as g on c.grade_id = g.gid;
12、查询学过“张三”老师2门课以上的同学的学号、姓名;
select stu.sid as 学号, stu.sname as 姓名 from student as stu inner join score as sco on stu.sid = sco.student_id where sco.course_id in ( select c.cid from teacher as t inner join course as c on t.tid = c.teacher_id where t.tname = '张三' ) group by stu.sid having count(sco.course_id) >= 2;
13、查询教授课程超过2门的老师的id和姓名;
select tid as id, tname as 姓名 from teacher as t inner join course as c on t.tid = c.teacher_id group by c.teacher_id having count(c.cid) >= 2;
14、查询学过编号“1”课程和编号“2”课程的同学的学号、姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select student_id from score where student_id in ( select student_id from score where course_id = 1 ) and course_id = 2 );
15、查询没有带过高年级的老师id和姓名;
select tid as 老师id, tname as 姓名 from teacher where tid not in ( select tc.tid from class as c inner join teach2cls as tc on c.cid = tc.cid where c.grade_id in (5,6) );
16、查询学过“张三”老师所教的所有课的同学的学号、姓名;
select distinct stu.sid as 学号, stu.sname as 姓名 from student as stu inner join score as sco on stu.sid = sco.student_id where sco.course_id in ( select c.cid from teacher as t inner join course as c on t.tid = c.teacher_id where t.tname = "张三" );
17、查询带过超过2个班级的老师的id和姓名;
select tid as id, tname as 姓名 from teacher where tid in ( select tid from teach2cls group by tid having count(cid) >= 2 );
18、查询课程编号“2”的成绩比课程编号“1”课程低的所有同学的学号、姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select t1.student_id from ( select * from score where course_id = 1 ) as t1 inner join ( select * from score where course_id = 2 ) as t2 on t1.student_id = t2.student_id where t1.score > t2.score );
19、查询所带班级数最多的老师id和姓名;
select tid as id, tname as 姓名 from teacher where tid in ( select tid from teach2cls group by tid having count(cid) = ( select count(cid) from teach2cls group by tid order by count(cid) desc limit 1 ) );
20、查询有课程成绩小于60分的同学的学号、姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select student_id from score where score < 60 );
21、查询没有学全所有课的同学的学号、姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select student_id from score group by student_id having count(course_id) != (select count(cid) from course) );
22、查询至少有一门课与学号为“1”的同学所学相同的同学的学号和姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select student_id from score where course_id in ( select course_id from score where student_id = 1 ) );
23、查询至少学过学号为“1”同学所选课程中任意一门课的其他同学学号和姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select student_id from score where course_id in ( select course_id from score where student_id = 1 ) and student_id != 1 );
24、查询和“2”号同学学习的课程完全相同的其他同学的学号和姓名;
select sid as 学号, sname as 姓名 from student where sid in (select student_id from score where student_id != 2 group by student_id having group_concat(course_id order by course_id asc) = (select group_concat(course_id order by course_id asc) from score where student_id = 2 group by student_id) );
25、删除学习“张三”老师课的score表记录;
delete from score where course_id in ( select c.cid from teacher as t inner join course as c on t.tid = c.teacher_id where t.tname = '张三' );
26、向score表中插入一些记录,这些记录要求符合以下条件:
①没有上过编号“2”课程的同学学号;②插入“2”号课程的平均成绩;
【插入第一条】
insert into score(student_id, course_id, score) values ( ( select sid from student where sid not in ( select s.student_id from score as s where s.course_id = 2 ) order by sid desc limit 0,1 ), 2, ( select avg(s.score) from score as s where s.course_id = 2 ) );
【插入第二条】
insert into score(student_id, course_id, score) values ( ( select sid from student where sid not in ( select s.student_id from score as s where s.course_id = 2 ) order by sid desc limit 1,1 ), 2, ( select avg(s.score) from score as s where s.course_id = 2 ) );
【改limit后的第一个参数值,可继续插入第三、四、...条】
27、按平均成绩从低到高显示所有学生的“语文”、“数学”、“英语”三门的课程成绩,按如下形式显示: 学生ID,语文,数学,英语,有效课程数,有效平均分;
【这里题目有歧义:有效课程数和有效平均分是仅以这3门课来统计,还是以学生所有科目来统计】
【解一:仅以这3门课来统计】
select t2.sid as 学生ID, sum(case when t1.cname = '语文' then t1.score else null end) as 语文, sum(case when t1.cname = '数学' then t1.score else null end) as 数学, sum(case when t1.cname = '英语' then t1.score else null end) as 英语, count(case when t1.cname in ('语文','数学','英语') then 1 else null end) as 有效课程数, avg(case when t1.cname in ('语文','数学','英语') then t1.score else null end) as 有效平均分 from (select * from score as s inner join course as c on s.course_id = c.cid) as t1 right join student as t2 on t1.student_id = t2.sid group by t2.sid order by avg(case when t1.cname in ('语文','数学','英语') then t1.score else null end) asc;
【解二:以该学生所有科目来统计】
select t2.sid as 学生ID, sum(case when t1.cname = '语文' then t1.score else null end) as 语文, sum(case when t1.cname = '数学' then t1.score else null end) as 数学, sum(case when t1.cname = '英语' then t1.score else null end) as 英语, count(t1.score) as 有效课程数, avg(t1.score) as 有效平均分 from (select * from score as s inner join course as c on s.course_id = c.cid) as t1 right join student as t2 on t1.student_id = t2.sid group by t2.sid order by avg(t1.score) asc;
28、查询各科成绩最高和最低的分:以如下形式显示:课程ID,最高分,最低分;
select course_id as 课程ID, max(score) as 最高分, min(score) as 最低分 from score group by course_id;
29、按各科平均成绩从低到高和及格率的百分数从高到低顺序;
【这里优先按平均成绩从低到高排序,若遇到平均成绩相同的则按及格率百分数从高到低排序】
select course_id as 课程ID, avg(score) as 平均成绩, concat(100*count(case when score>=60 then 1 else null end)/count(score),"%") as 及格率 from score group by course_id order by avg(score) asc, count(case when score>=60 then 1 else null end)/count(score) desc;
30、课程平均分从高到低显示(显示任课老师);
select t1.cname as 课程名称, avg(t2.score) as 平均分, t1.tname as 任课老师 from (select * from teacher as t inner join course as c on t.tid = c.teacher_id) as t1 inner join score as t2 on t1.cid = t2.course_id group by t2.course_id order by avg(t2.score) desc;
31、查询各科成绩前三名的记录(不考虑成绩并列情况)
【本题与44题类似,不会做,于是百度了下"如何在mysql中查询每个分组的前几名",参照其中的一个比较优雅的方法,写出了答案】
【注:这里仍然是按照score表默认的排序,即sid的排序】
select * from score where ( select count(*) from score as s where s.course_id = score.course_id and s.score <= score.score ) <= 3;
32、查询每门课程被选修的学生数;
select cname as 课程名, count(s.student_id) as 选修学生数 from course as c left join score as s on c.cid = s.course_id group by c.cid;
33、查询选修了2门以上课程的全部学生的学号和姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select student_id from score group by student_id having count(course_id) >= 2 );
34、查询男生、女生的人数,按倒序排列;
select gender, count(sid) from student group by gender order by count(sid) desc;
35、查询姓“张”的学生名单;
【查法1——正则】
select sname from student where sname regexp "^张.*";
【查法2——like】
select sname from student where sname like "张%";
36、查询同名同姓学生名单,并统计同名人数;
select sname as 姓名, count(sid) as 同名人数 from student group by sname having count(sid) > 1;
37、查询每门课程的平均成绩,结果按平均成绩升序排列,平均成绩相同时,按课程号降序排列;
select avg(score),course_id from score group by course_id order by avg(score) asc, course_id desc;
38、查询课程名称为“数学”,且分数低于60的学生姓名和分数;
select stu.sname as 学生姓名, sco.score as 分数 from student as stu inner join score as sco on stu.sid = sco.student_id where sco.course_id = ( select cid from course where cname = '数学' ) and sco.score < 60;
39、查询课程编号为“3”且课程成绩在80分以上的学生的学号和姓名;
select sid as 学号, sname as 姓名 from student where sid in ( select student_id from score where course_id = 3 and score >= 80 );
40、求选修了课程的学生人数
select count(1) as 学生人数 from ( select distinct student_id from score ) as t1;
41、查询选修“王五”老师所授课程的学生中,成绩最高和最低的学生姓名及其成绩;
select stu.sname as 学生姓名, sco.score as 成绩 from student as stu inner join score as sco on stu.sid = sco.student_id where score in ( ( select max(score) from score where course_id in ( select c.cid from teacher as t inner join course as c on t.tid = c.teacher_id where t.tname = '王五' ) ), ( select min(score) from score where course_id in ( select c.cid from teacher as t inner join course as c on t.tid = c.teacher_id where t.tname = '王五' ) ) );
42、查询各个课程及相应的选修人数;
select cname as 课程名, count(s.student_id) as 选修学生数 from course as c left join score as s on c.cid = s.course_id group by c.cid;
43、查询不同课程但成绩相同的学生的学号、课程号、学生成绩;
select student_id as 学号, course_id as 课程号, score as 学生成绩 from score group by score having count(student_id) > 1;
44、查询每门课程成绩最好的前两名学生id和姓名;
【注:这里指定了前两名,所以若出现多名同分的学生也只取倒序排的默认前2名】
【与31题类似…不会写,于是百度了下"如何在mysql中查询每个分组的前几名",参照其中一种比较高端且高效的自定义变量的方法,写出了答案】
(但下面这个答案因为是一个从分组中查询非分组字段的查询,它只能在 ONLY_FULL_GROUP_BY 模式关闭(链接)的情况下才能起作用。)
set @num := 0, @cname := ''; select t.cid as 课程ID, t.cname as 课程名, t.sid as 学生ID, t.sname as 学生名, t.score as 成绩, @num := if(@cname = t.cname, @num + 1, 1) as 排名, @cname := t.cname as 课程名确认 from ( SELECT * FROM ( select stu.sid, stu.sname, sco.course_id, sco.score from student as stu inner join score as sco on stu.sid = sco.student_id ORDER BY sco.score ) as t1 right join course as t2 on t1.course_id = t2.cid ) as t group by t.cid, t.score, t.sname having 排名 <= 2 ;
但是导师不推荐这种写法,于是附上导师的参考答案吧
SELECT c.sid, a.course_id, c.sname, d.cname, a.score FROM score a INNER JOIN ( SELECT course_id, score, rank FROM ( SELECT a.course_id, a.score, count(*) AS rank FROM ( SELECT course_id, score FROM score GROUP BY course_id, score ORDER BY course_id, score DESC ) a INNER JOIN ( SELECT course_id, score FROM score GROUP BY course_id, score ORDER BY course_id, score DESC ) b ON a.course_id = b.course_id AND a.score <= b.score GROUP BY course_id, score ) t1 WHERE rank IN (1, 2) ORDER BY course_id, rank ) b ON a.course_id = b.course_id AND a.score = b.score INNER JOIN student c ON a.student_id = c.sid INNER JOIN course d ON a.course_id = d.cid ORDER BY course_id, score DESC, sid ASC;
45、检索至少选修两门课程的学生学号;
select sid as 学号 from student where sid in ( select student_id from score group by student_id having count(course_id) >= 2 );
46、查询没有学生选修的课程的课程号和课程名;
select cid as 课程号, cname as 课程名 from course where cid not in ( select distinct course_id from score );
47、查询没带过任何班级的老师id和姓名;
select tid as 老师id, tname as 姓名 from teacher where tid not in ( select distinct tid from teach2cls);
48、查询有两门以上课程超过80分的学生id及其平均成绩;
select student_id as 学生id, avg(score) as 平均成绩 from score where student_id in ( select student_id from score where score >= 80 group by student_id having count(course_id) >= 2 ) group by student_id;
49、检索“3”课程分数小于60,按分数降序排列的同学学号;
select distinct student_id as 学号 from score where course_id = 3 and score < 60 order by score desc;
50、删除编号为“2”的同学的“1”课程的成绩;
delete from score where student_id = 2 and course_id = 1;
51、查询同时选修了物理课和生物课的学生id和姓名;
select sid as 学生id, sname as 姓名 from student where sid in ( select student_id from score where course_id = (select cid from course where cname = '生物') ) and sid in ( select student_id from score where course_id = ( select cid from course where cname = '物理' ) );
三、作业引申
★count(*)、count(1)与count(COL)的抉择?
【参考文章】《Select count(*)和Count(1)的区别和执行方式》
往常我经常会看到一些所谓的优化建议不使用Count(* )而是使用Count(1),从而可以提升性能,给出的理由是Count( *)会带来全表扫描。而实际上如何写Count并没有区别。
实际上 count 的意思是,评估count()中的表达式是否为NULL,如果括号中表达式为NULL则结果不计数(计为0),而括号中表达式非NULL则会计数。
1、比如我们看下面的所示,在 count 中指定NULL(优化器不允许显式指定NULL,因此需要赋值给自定义变量才能指定)。
SET @xx=NULL SELECT COUNT(@xx) FROM class;
由于这里相当于对所有行都计为NULL,所以结果全部计数为0,结果如下图所示。
+------------+ | count(@xx) | +------------+ | 0 | +------------+ 1 row in set (0.00 sec)
2、因此当你指定Count(*)或者Count(1)或者无论Count(‘anything’)时结果都会一样,因为括号里这些值都不为NULL,语句如下图所示。
select count(*) from class; select count(1) from class; select count('anything') from class;
运行结果都一样:
+----------+ | count(*) | +----------+ | 5 | +----------+ 1 row in set (0.00 sec) +----------+ | count(1) | +----------+ | 5 | +----------+ 1 row in set (0.00 sec) +-------------------+ | count('anything') | +-------------------+ | 5 | +-------------------+ 1 row in set (0.00 sec)
3、那么count(COL)对某一列(字段)进行计数呢?
对于Count(列)来说,同样适用于上面规则,评估括号内的列中每一行的值是否为NULL,如果某行为NULL则该行不计数,某行不为NULL则该行计数。因此Count(列)会计算列或这列的组合不为空的计数。
例如下面这张test表:
+------+------+------+ | a | b | c | +------+------+------+ | 1 | 1 | 1 | | 2 | 2 | 2 | | NULL | NULL | NULL | +------+------+------+ 3 rows in set (0.00 sec)
我们使用count(a) 对a列进行计数,语句和结果如下:
select count(a) from test; +----------+ | count(a) | +----------+ | 2 | +----------+ 1 row in set (0.00 sec)
而我们用count(*)(或count(1)、count('anything')等)则计数为3:
select count(*) from test; +----------+ | count(*) | +----------+ | 3 | +----------+ 1 row in set (0.00 sec)
【总结】
如果我们要统计有多少条记录(连全为null的记录也算),则直接用count(*)或count(1)都可以;
如果我们要统计某一列有多少条有效记录(为null的记录/行不算),则用count(列)的方式。
★group_concat的具体玩法?
【参考文章】《mysql之group_concat函数详解》
group_concat函数的具体语法如下:
group_concat( [DISTINCT] 要连接的字段 [Order BY 排序字段 ASC/DESC] [Separator '分隔符'] )
下面举例说明
select * from goods; +------+-------+ | id | price | +------+-------+ | 1 | 10 | | 1 | 20 | | 1 | 20 | | 2 | 20 | | 3 | 200 | | 3 | 500 | +------+-------+ 6 rows in set (0.00 sec)
group_concat的基本功能是:以id分组,把price字段的值在同一行打印出来,逗号分隔(默认)
select id, group_concat(price) from goods group by id; +------+---------------------+ | id | group_concat(price) | +------+---------------------+ | 1 | 10,20,20 | | 2 | 20 | | 3 | 200,500 | +------+---------------------+ 3 rows in set (0.00 sec)
对于这样的查询结果,我们可以使用group_concat的参数对结果进行一些处理:
1、以id分组,把price字段的值在一行打印出来,改为"/"号分隔
select id, group_concat(price separator "/") from goods group by id; +------+-----------------------------------+ | id | group_concat(price separator "/") | +------+-----------------------------------+ | 1 | 10/20/20 | | 2 | 20 | | 3 | 200/500 | +------+-----------------------------------+ 3 rows in set (0.00 sec)
2、以id分组,把去除重复冗余的price字段的值打印在一行,逗号分隔
select id,group_concat(distinct price) from goods group by id; +------+------------------------------+ | id | group_concat(distinct price) | +------+------------------------------+ | 1 | 10,20 | | 2 | 20 | | 3 | 200,500 | +------+------------------------------+ 3 rows in set (0.00 sec)
3、以id分组,把price字段的值打印在一行,逗号分隔,按照price倒序排列
select id,group_concat(price order by price desc) from goods group by id; +------+-----------------------------------------+ | id | group_concat(price order by price desc) | +------+-----------------------------------------+ | 1 | 20,20,10 | | 2 | 20 | | 3 | 500,200 | +------+-----------------------------------------+ 3 rows in set (0.00 sec)
★SELECT CASE WHEN的具体玩法?
【参考文章】《CASE WHEN 及 SELECT CASE WHEN的用法》
case when能为我们提供什么样的玩法呢?
1、已知数据按照另外一种方式进行分组,分析。
例如:根据如下的国家人口数据,统计亚洲和北美洲的人口数量。
先创建表格,并插入数据
create table population( country char(20) primary key, population int);
insert into population values ('中国', 600), ('美国', 100), ('加拿大',100), ('英国', 200), ('法国', 300), ('日本', 250), ('德国', 200), ('墨西哥', 50), ('印度', 250);
得到表格:
+-----------+------------+ | country | population | +-----------+------------+ | 中国 | 600 | | 加拿大 | 100 | | 印度 | 250 | | 墨西哥 | 50 | | 德国 | 200 | | 日本 | 250 | | 法国 | 300 | | 美国 | 100 | | 英国 | 200 | +-----------+------------+ 9 rows in set (0.00 sec)
用CASE WHEN ELSE END对字段进行分类处理:
SELECT CASE country WHEN '中国' THEN '亚洲' WHEN '印度' THEN '亚洲' WHEN '日本' THEN '亚洲' WHEN '美国' THEN '北美洲' WHEN '加拿大' THEN '北美洲' WHEN '墨西哥' THEN '北美洲' ELSE '其他' END as '洲', SUM(population) as '人口' FROM population GROUP BY CASE country WHEN '中国' THEN '亚洲' WHEN '印度' THEN '亚洲' WHEN '日本' THEN '亚洲' WHEN '美国' THEN '北美洲' WHEN '加拿大' THEN '北美洲' WHEN '墨西哥' THEN '北美洲' ELSE '其他' END;
结果如下:
+-----------+--------+ | 洲 | 人口 | +-----------+--------+ | 亚洲 | 1100 | | 其他 | 700 | | 北美洲 | 250 | +-----------+--------+ 3 rows in set (0.00 sec)
【注】题目11——查询班级信息,包括班级id、班级名称、年级、年级级别(12为低年级,34为中年级,56为高年级)
解法就是用到这种方法
select cid as 班级id, caption as 班级名称, gname as 年级, case when g.gid in (1,2) then '低年级' when g.gid in (3,4) then '中年级' when g.gid in (5,6) then '高年级' else '其他' end as 年级级别 from class as c inner join class_grade as g on c.grade_id = g.gid;
+----------+--------------+-----------+--------------+ | 班级id | 班级名称 | 年级 | 年级级别 | +----------+--------------+-----------+--------------+ | 1 | 一年一班 | 一年级 | 低年级 | | 2 | 二年一班 | 二年级 | 低年级 | | 3 | 三年二班 | 三年级 | 中年级 | | 5 | 少四一班 | 少五 | 中年级 | | 4 | 少五三班 | 少四 | 高年级 | +----------+--------------+-----------+--------------+ 5 rows in set (0.00 sec)
2、用一个SQL语句完成不同条件的分组计数
例如:对下述数据,按照国家和性别进行分组统计。
先创建表格,并插入数据
create table population2( country char(20), sex int, population int);
insert into population2 values ('中国', 1, 340), ('中国', 2, 260), ('美国', 1, 45), ('美国', 2, 55), ('加拿大',1, 51), ('加拿大',2, 49), ('英国', 1, 40), ('英国', 2, 60);
用CASE WHEN ELSE END对数据进行国家和性别的分组计数:
SELECT country as '国家', SUM( CASE WHEN sex = '1' THEN population ELSE 0 END) as '男', SUM( CASE WHEN sex = '2' THEN population ELSE 0 END) as '女' FROM population2 GROUP BY country;
得到分组计数结果:
+-----------+------+------+ | 国家 | 男 | 女 | +-----------+------+------+ | 中国 | 340 | 260 | | 加拿大 | 51 | 49 | | 美国 | 45 | 55 | | 英国 | 40 | 60 | +-----------+------+------+ 4 rows in set (0.00 sec)
【注】题目27——按平均成绩从低到高显示所有学生的“语文”、“数学”、“英语”三门的课程成绩,按如下形式显示:学生ID,语文,数学,英语,有效课程数,有效平均分;
解法就是用到这种方法
【这里题目有歧义:有效课程数和有效平均分是仅以这3门课来统计,还是以学生所有科目来统计】 【解一:仅以这3门课来统计】 select t2.sid as 学生ID, sum(case when t1.cname = '语文' then t1.score else null end) as 语文, sum(case when t1.cname = '数学' then t1.score else null end) as 数学, sum(case when t1.cname = '英语' then t1.score else null end) as 英语, count(case when t1.cname in ('语文','数学','英语') then 1 else null end) as 有效课程数, avg(case when t1.cname in ('语文','数学','英语') then t1.score else null end) as 有效平均分 from (select * from score as s inner join course as c on s.course_id = c.cid) as t1 right join student as t2 on t1.student_id = t2.sid group by t2.sid order by avg(case when t1.cname in ('语文','数学','英语') then t1.score else null end) asc; 【解二:以该学生所有科目来统计】 select t2.sid as 学生ID, sum(case when t1.cname = '语文' then t1.score else null end) as 语文, sum(case when t1.cname = '数学' then t1.score else null end) as 数学, sum(case when t1.cname = '英语' then t1.score else null end) as 英语, count(t1.score) as 有效课程数, avg(t1.score) as 有效平均分 from (select * from score as s inner join course as c on s.course_id = c.cid) as t1 right join student as t2 on t1.student_id = t2.sid group by t2.sid order by avg(t1.score) asc;
【解一结果】 +----------+--------+--------+--------+-----------------+-----------------+ | 学生ID | 语文 | 数学 | 英语 | 有效课程数 | 有效平均分 | +----------+--------+--------+--------+-----------------+-----------------+ | 2 | 0 | 0 | 0 | 0 | NULL | | 9 | 0 | 0 | 0 | 0 | NULL | | 3 | 0 | 0 | 0 | 0 | NULL | | 7 | 0 | 0 | 0 | 0 | NULL | | 4 | 0 | 0 | 0 | 0 | NULL | | 1 | 0 | 0 | 0 | 0 | NULL | | 5 | 0 | 40 | 0 | 1 | 40.0000 | | 6 | 0 | 80 | 0 | 1 | 80.0000 | | 8 | 100 | 0 | 89 | 2 | 94.5000 | +----------+--------+--------+--------+-----------------+-----------------+ 9 rows in set (0.00 sec) 【解二结果】 +----------+--------+--------+--------+-----------------+-----------------+ | 学生ID | 语文 | 数学 | 英语 | 有效课程数 | 有效平均分 | +----------+--------+--------+--------+-----------------+-----------------+ | 9 | NULL | NULL | NULL | 0 | NULL | | 4 | NULL | NULL | NULL | 0 | NULL | | 1 | NULL | NULL | NULL | 1 | 13.0000 | | 7 | NULL | NULL | NULL | 1 | 37.0000 | | 3 | NULL | NULL | NULL | 2 | 50.5000 | | 8 | 100 | NULL | 89 | 3 | 63.0000 | | 5 | NULL | 40 | NULL | 2 | 67.0000 | | 6 | NULL | 80 | NULL | 1 | 80.0000 | | 2 | NULL | NULL | NULL | 1 | 89.0000 | +----------+--------+--------+--------+-----------------+-----------------+ 9 rows in set (0.00 sec)
3、在count中直接用CASE WHEN ESLE END来针对符合特定条件的记录进行计数
这里直接拿 题目29 来举例——按各科平均成绩从低到高和及格率的百分数从高到低顺序:
【这里优先按平均成绩从低到高排序,若遇到平均成绩相同的则按及格率百分数从高到低排序】 select course_id as 课程ID, avg(score) as 平均成绩, concat(100*count(case when score>=60 then 1 else null end)/count(score),"%") as 及格率 from score group by course_id order by avg(score) asc, count(case when score>=60 then 1 else null end)/count(score) desc;
查询结果:
+----------+--------------+-----------+ | 课程ID | 平均成绩 | 及格率 | +----------+--------------+-----------+ | 2 | NULL | NULL | | 7 | 34.0000 | 33.3333% | | 8 | 45.0000 | 0.0000% | | 4 | 60.0000 | 50.0000% | | 3 | 62.3333 | 33.3333% | | 6 | 89.0000 | 100.0000% | | 5 | 100.0000 | 100.0000% | +----------+--------------+-----------+ 7 rows in set (0.00 sec)
★如何在mysql中同时查询显示每个分组的前几名
【参考文章】《如何在mysql中查询每个分组的前几名》
1、一种较优雅的方式
这里直接拿 题目31举例——查询各科成绩前三名的记录(不考虑成绩并列情况):
【注:这里仍然是按照score表默认的排序,即sid的排序】 select * from score where ( select count(*) from score as s where s.course_id = score.course_id and s.score <= score.score ) <= 3;
+-----+------------+-----------+-------+ | sid | student_id | course_id | score | +-----+------------+-----------+-------+ | 1 | 1 | 1 | 60 | | 2 | 1 | 2 | 21 | | 3 | 2 | 2 | 99 | | 4 | 3 | 3 | 56 | | 5 | 4 | 1 | 56 | | 6 | 5 | 3 | 94 | | 7 | 5 | 4 | 40 | | 8 | 6 | 4 | 80 | | 9 | 7 | 3 | 37 | | 10 | 8 | 5 | 100 | | 11 | 8 | 6 | 89 | | 12 | 8 | 7 | 0 | | 13 | 3 | 8 | 45 | | 15 | 2 | 7 | 89 | | 16 | 2 | 1 | 61 | +-----+------------+-----------+-------+ 15 rows in set (0.00 sec)
缺点——时间复杂度均为分组中条目数的二次方。很多优化器都不能优化这种查询,使得它的耗时最好为全表行数的二次方(尤其在没有设置正确的索引时),而且数据量大时,可能将服务器会停止响应。那么还有更好的方法吗?有没有办法可以仅仅扫描一次数据,而不是通过子查询进行多次扫描。
2、一种更高效的方式(使用自定义变量)
这里直接拿 题目44举例——查询每门课程成绩最好的前两名学生id和姓名
set @num := 0, @cname := ''; select t2.cid as 课程ID, t2.cname as 课程名, t1.sid as 学生ID, t1.sname as 学生名, t1.score as 成绩, @num := if(@cname = t2.cname, @num + 1, 1) as 排名, @cname := t2.cname as 课程名确认 from ( select stu.sid, stu.sname, sco.course_id, sco.score from student as stu inner join score as sco on stu.sid = sco.student_id ) as t1 right join course as t2 on t1.course_id = t2.cid group by t2.cid, t1.score, t1.sname having 排名 <= 2;
+----------+--------------------------+----------+-----------------+--------+--------+--------------------------+ | 课程ID | 课程名 | 学生ID | 学生名 | 成绩 | 排名 | 课程名确认 | +----------+--------------------------+----------+-----------------+--------+--------+--------------------------+ | 1 | 生物 | 4 | 葫芦娃 | 56 | 2 | 生物 | | 1 | 生物 | 7 | 樱木花道 | 89 | 1 | 生物 | | 2 | 体育 | 1 | 乔丹 | 21 | 2 | 体育 | | 2 | 体育 | 2 | 艾弗森 | 99 | 1 | 体育 | | 3 | 物理 | 7 | 樱木花道 | 37 | 1 | 物理 | | 3 | 物理 | 3 | 科比 | 56 | 2 | 物理 | | 4 | 数学 | 5 | 流河旱树 | 40 | 1 | 数学 | | 4 | 数学 | 6 | 美少女战士 | 80 | 2 | 数学 | | 5 | 语文 | 8 | 松岛菜菜子 | 100 | 1 | 语文 | | 6 | 英语 | 8 | 松岛菜菜子 | 89 | 1 | 英语 | | 7 | 土遁•沙地送葬 | 8 | 松岛菜菜子 | 0 | 2 | 土遁•沙地送葬 | | 7 | 土遁•沙地送葬 | 2 | 艾弗森 | 89 | 1 | 土遁•沙地送葬 | | 8 | 夏日喂蚊子大法 | 3 | 科比 | 45 | 1 | 夏日喂蚊子大法 | | 9 | 麻将牌九扑克千术 | NULL | NULL | NULL | 1 | 麻将牌九扑克千术 | +----------+--------------------------+----------+-----------------+--------+--------+--------------------------+ 14 rows in set (0.00 sec)
这种查询方法在MySQL中只进行一次扫描,而且没有文件排序(filesort)和临时表(但似乎只支持group by为其中一个连接表的主键?)。