牛客SQL-非技术快速入门
01 基础查询
SQL1 查询所有列
select * from user_profile
SQL2 查询多列
select device_id,gender,age,university from user_profile
SQL3 查询结果去重
select distinct(university) from user_profile
SQL4 查询结果限制返回行数
top不适用于所有的数据库语言。SQL SERVER里可以使用。
在MySQL中使用的是limit 来限制个数 。
LIMIT 接受一个或两个数字参数。参数必须是一个整数常量。
如果只给定一个参数,它表示返回最大的记录行数目。
如果给定两个参数,第一个参数指定第一个返回记录行的偏移量,第二个参数指定返回记录行的最大数目。
select device_id from user_profile limit 0,2 --- 运行效率更高
select device_id from user_profile limit 2 --- 运行效率低
SQL5 将查询后的列重新命名
select device_id as user_infos_example from user_profile limit 0,2
这里主要是用到了 起别名关键字 as 以及组合限制查询 limit 索引,个数
其中as可以省略,索引为0可以省略
select device_id user_infos_example from user_profile limit 2
02 条件查询
SQL6 查找学校是北大的学生信息
select device_id,university from user_profile where university = '北京大学'
SQL7 查找年龄大于24岁的用户信息
select device_id,gender,age,university from user_profile where age > 24
SQL8 查找某个年龄段的用户信息
select device_id,gender,age from user_profile where age >= 20 and age <= 23
SQL9 查找除复旦大学的用户信息
select device_id,gender,age,university from user_profile where university != '复旦大学'
SQL10 用where过滤空值练习
select device_id,gender,age,university from user_profile where age is not null
SQL11 高级操作符练习(1)
SELECT device_id,gender,age,university,gpa
FROM user_profile
where gender = 'male' and gpa > 3.5;
SQL12 高级操作符练习(2)
select device_id,gender,age,university,gpa
from user_profile
where university = "北京大学" or gpa >3.7
SQL13 Where in 和Not in
SELECT device_id,gender,age,university,gpa
FROM user_profile
WHERE university IN ('北京大学' , '复旦大学', '山东大学')
这主要是用标题的两个关键字中的一个 in(字段...) 包含条件的字段,not in(字段..) 除字段以外的
SQL14 操作符混合运用
select device_id,gender,age,university,gpa
from user_profile
where (gpa > 3.5 and university = '山东大学') or (gpa > 3.8 and university = '复旦大学')
SQL15 查看学校名称中含北京的用户
SELECT device_id,age,university
FROM user_profile
where university like "%北京%"
SQL36 查找后排序
- order by +属性+后面不写默认为升序
- order by xx asc 按xx升序排序
- order by xx desc 按xx降序排序
select device_id,age
from user_profile
order by age
SQL37 查找后多列排序
select device_id,gpa,age
from user_profile
order by gpa,age;
SQL38 查找后降序排列
select device_id,gpa,age
from user_profile
order by gpa desc,age desc;
03 高级查询
SQL16 查找GPA最高值
SELECT MAX(gpa) FROM user_profile WHERE university='复旦大学';
SQL17 计算男生人数以及平均GPA
- AVG() 函数返回数值列的平均值。
- ROUND() 函数用于把数值字段舍入为指定的小数位数。
select
count(gender) as male_num,
round(avg(gpa),1) as avg_gpa
from user_profile
where gender = 'male';
扩展:
1. count(1)、count(*)、count (字段)的区别?
count(1)和 count(*)都是统计所有行数,count(字段)统计该字段非null的行数,
执行效率简单来说,count(1)和count(*)相同,因为它们都要做全表扫描,count(字段)效率比前两者效率高。
2. avg()函数 会忽略null值,而不是将其当做“0”参与运算。
SQL18 分组计算练习题
每个学校每种性别,需要按性别和学校分组
SELECT gender,university,
COUNT(device_id) as user_num,
AVG(active_days_within_30) as avg_active_days,
AVG(question_cnt) as avg_quesition_cnt
FROM user_profile
GROUP BY gender,university
SQL19 分组过滤练习题
聚合函数结果作为筛选条件时,不能用where,而是用having语法,配合重命名(重命名后的聚合函数)即可;
它的功能有点像WHERE子句,但它用于组而不是单个记录。
SELECT university,
avg(question_cnt) as avg_question_cnt,
avg(answer_cnt) as avg_answer_cnt
FROM user_profile
GROUP BY university
having avg_question_cnt< 5 or avg_answer_cnt< 20
SQL20 分组排序练习题
select university,
avg(question_cnt) as avg_question_cnt
from user_profile
group by university
order by avg_question_cnt
04 多表查询
SQL21 浙江大学用户题目回答情况
使用子查询
select device_id, question_id, result
from question_practice_detail
where device_id in (
select device_id
from user_profile
where university='浙江大学'
)
两表连接
SELECT q.device_id, question_id, result
FROM question_practice_detail q, user_profile u
WHERE q.device_id = u.device_id AND u.university = '浙江大学';
左连接
SELECT
q.device_id,
q.question_id,
q.result
FROM
question_practice_detail q
LEFT JOIN user_profile u ON q.device_id = u.device_id
WHERE
u.university = '浙江大学'
SQL22 统计每个学校的答过题的用户的平均答题数
每个学校 group by university
此题最重要的就是理解平均回答数是回答数 除以 回答的人(去重)
即question_id/device_id
select university,
count(question_id) / count(distinct qpd.device_id) as avg_answer_cnt
from question_practice_detail as qpd
inner join user_profile as up
on qpd.device_id=up.device_id
group by university
order by university
SQL23 统计每个学校各难度的用户平均刷题数
三表连接
比上一题多连接一张题目明细表
select
university,
difficult_level,
# 保留4位小数
round(count(qpd.question_id) / count(distinct qpd.device_id), 4) as avg_answer_cnt
from question_practice_detail as qpd
left join user_profile as up
on up.device_id = qpd.device_id
left join question_detail as qd
on qd.question_id = qpd.question_id
# 分组
group by university, difficult_level
SQL24 统计每个用户的平均刷题数
select
university,
difficult_level,
COUNT(answer_cnt)/count(distinct qpd.device_id) as avg_answer_cnt
from question_practice_detail as qpd
#加入表user_profile 提前过滤
inner join user_profile as up
on qpd.device_id = up.device_id and up.university='山东大学'
#加入表question_detail
inner join question_detail as qd
on qpd.question_id=qd.question_id
group by difficult_level
SQL25 查找山东大学或者性别为男生的信息
使用以下语句,会去重
where university = '山东大学' or gender = 'male'
-
union 会去重
-
union all 不会去重
select
device_id, gender, age, gpa
from user_profile
where university='山东大学'
union all
select
device_id, gender, age, gpa
from user_profile
where gender='male'
05 必会的常用函数
SQL26 计算25岁以上和以下的用户数量
CASE函数
常被用来行转列
CASE 测试表达式
WHEN 简单表达式1 THEN 结果表达式1
WHEN 简单表达式2 THEN 结果表达式2 …
WHEN 简单表达式n THEN 结果表达式n
[ ELSE 结果表达式n+1 ]
END
SELECT (
CASE
WHEN age < 25 OR age IS NULL THEN '25岁以下'
WHEN age >= 25 THEN '25岁及以上'
END ) as age_cut,
COUNT(*) as number
FROM user_profile
GROUP BY age_cut
IF函数
SELECT
IF(age < 25 OR age IS NULL, "25岁以下", "25岁及以上") AS age_cut,
COUNT(id) AS number
FROM user_profile
GROUP BY age_cut
联合查询
SELECT '25岁以下' as age_cut,COUNT(device_id) as number
FROM user_profile
WHERE age < 25 OR age IS NULL
union
SELECT '25岁及以上' as age_cut,COUNT(device_id) as number
FROM user_profile
WHERE age >= 25
SQL27 查看不同年龄段的用户明细
select
device_id,
gender,
( case
when age>=25 then '25岁及以上'
when age>=20 then '20-24岁'
when age<20 then '20岁以下'
else '其他'
end ) as age_cut
from user_profile
SQL28 计算用户8月每天的练题数量
日期函数
- MONTH(date)
- YEAR(date)
select
day(date) as day,
count(question_id) as question_cnt
from question_practice_detail
where month(date) = 8 and year(date) = 2021
group by date
SQL29 计算用户的平均次日留存率
方法一:窗口函数
select avg(if(datediff(date2, date1)=1, 1, 0)) as avg_ret
from (
select
device_id,date as date1,
-- 统计分组内往下第n行值
lead(date,1) over (partition by device_id order by date) as date2
from (
-- 按device_id和date去重
select
distinct device_id, date
from question_practice_detail
) as uniq_id_date
) as id_last_next_date
方法二:两表join
原本日期
date_sub(``date``,interval 1 ``day``)
减去一天后
left join 后
select avg(if(b.device_id is not null,1,0)) as avg_ret
from (
select
distinct device_id,date
from question_practice_detail
)a
left join
(
select
distinct device_id,date_sub(date,interval 1 day) as date
from question_practice_detail
)b
on a.device_id = b.device_id and a.date = b.date
SQL30 统计每种性别的人数
字符串函数
1、LOCATE(substr , str ):返回子串 substr 在字符串 str 中第一次出现的位置,如果字符substr在字符串str中不存在,则返回0;
2、POSITION(substr IN str ):返回子串 substr 在字符串 str 中第一次出现的位置,如果字符substr在字符串str中不存在,与LOCATE函数作用相同;
3、LEFT(str, length):从左边开始截取str,length是截取的长度;
4、RIGHT(str, length):从右边开始截取str,length是截取的长度;
5、SUBSTRING_INDEX(str ,substr ,n):返回字符substr在str中第n次出现位置之前的字符串;
6、SUBSTRING(str ,n ,m):返回字符串str从第n个字符截取到第m个字符;
7、REPLACE(str, n, m):将字符串str中的n字符替换成m字符;
8、LENGTH(str):计算字符串str的长度。
SELECT SUBSTRING_INDEX(profile,",",-1) as gender,COUNT(*) as number
FROM user_submit
GROUP BY gender;
SQL31 提取博客URL中的用户名
select device_id,
substring_index(blog_url, '/', -1) as user_name
from user_submit
SQL32 截取出年龄
先截取至年龄
SUBSTRING_INDEX(profile,',',3)
返回的是
180cm,75kg,27
165cm,45kg,26
178cm,65kg,25
171cm,55kg,23
168cm,45kg,22
再把年龄拿出来
substring_index(SUBSTRING_INDEX(profile,',',3),',',-1)
SELECT
substring_index(substring_index(profile, ",", -2), ",", 1) as age,
COUNT(*) as number
FROM user_submit
GROUP BY age;
SQL33 找出每个学校GPA最低的同学
group by university 后,select只能使用 university 和 函数 字段,不能在用device_id字段了(聚合后没有了device_id特性)
所以,这种写法是错误的:
SELECT device_id,
university,
min(gpa) as gpa
FROM user_profile
GROUP BY university
但是却可以出现自连接
select device_id,
university,
gpa
from user_profile
where (university,gpa) in (
select university,
min(gpa)
from user_profile
group by university
)
order by university
另一种方法就是使用滑动窗口函数
窗口排序函数
- row_number:在每个分组中,为每行分配一个从1开始的唯一序列号,递增,不考虑重复;(1234567……)
- rank: 在每个分组中,为每行分配一个从1开始的序列号,考虑重复,挤占后续位置;(12225……)
- dense_rank: 在每个分组中,为每行分配一个从1开始的序列号,考虑重复,不挤占后续位置(12223……)
SELECT device_id, university,gpa
FROM(
SELECT device_id, university,gpa,
# 按学校分组,按gpa排序
RANK() over (PARTITION BY university ORDER BY gpa) rk
FROM user_profile
) up
WHERE up.rk=1;
06 综合练习
SQL34 统计复旦用户8月练题情况
select a.device_id,
university,
sum(if(result is not null,1,0)) as question_cnt,
sum(if(result = 'right',1,0)) as right_question_cnt
from user_profile as a
left join question_practice_detail as b on a.device_id = b.device_id
where university = '复旦大学' and (month(date)=8 or date is null)
group by a.device_id
SQL35 浙大不同难度题目的正确率
题目难度,正确率=正确题数/总题数
result字段是right和wrong两种字符串,没法使用聚合函数,需要用if转成01
SELECT qd.difficult_level,
-- 正确率计算
sum(if(qpd.result='right',1,0))/count(qpd.device_id) AS correct_rate
FROM
-- 三表连接
question_practice_detail AS qpd
inner JOIN user_profile AS u ON qpd.device_id = u.device_id and u.university = '浙江大学'
inner JOIN question_detail AS qd ON qpd.question_id = qd.question_id
GROUP BY qd.difficult_level
ORDER BY correct_rate ASC;
SQL39 21年8月份练题总数
select
count(distinct device_id) as did_cnt,
count(question_id) as question_cnt
from question_practice_detail
where month(date) = 8 and year(date) = 2021