牛客MySQL真题练习2(180-194)
- 统计每款的SPU(货号)数量,并按SPU数量降序排序
SELECT style_id, COUNT(item_id) AS SPU_num
FROM product_tb
GROUP BY style_id
ORDER BY SPU_num DESC
- 统计实际总销售额与客单价(人均付费,总收入/总用户数,结果保留两位小数)
SELECT SUM(sales_price) AS sales_total, ROUND(SUM(sales_price) / COUNT(DISTINCT user_id), 2) AS per_trans
FROM sales_tb
- 统计折扣率(GMV/吊牌金额,GMV指的是成交金额),以上例子的输出结果如下(折扣率保留两位小数)
SELECT ROUND(gmv / origin_price * 100, 2) AS 'discount_rate(%)'
FROM (
SELECT
SUM(sales_price) AS gmv,
SUM(tag_price * sales_num) AS origin_price
FROM product_tb t1 LEFT JOIN sales_tb t2 USING(item_id)
) a
- 统计每款的动销率(pin_rate,有销售的SKU数量/在售SKU数量)与售罄率(sell-through_rate,GMV/备货值,备货值=吊牌价*库存数),按style_id升序排序
SELECT
style_id,
ROUND(sku / (total_inventory - sku) * 100, 2) AS "pin_rate(%)", -- 在售SKU数量需要用总库存量减去已销售量
ROUND(gmv / total_price * 100, 2) AS "sell-through_rate(%)"
FROM (
( # 1. 建立表a:计算各类商品的销量和售出总额
SELECT
style_id,
SUM(sales_num) AS sku,
SUM(sales_price) AS gmv
FROM product_tb t1 LEFT JOIN sales_tb t2 USING(item_id)
GROUP BY style_id
) a
JOIN ( # 2. 建立表b:计算各类商品的总库存和备货值
SELECT
style_id,
SUM(inventory) AS total_inventory,
SUM(tag_price * inventory) AS total_price
FROM product_tb
GROUP BY style_id
) b USING(style_id)
) -- 命名就错了,为什么?
ORDER BY style_id
/* 重点:
1. 动销率的定义是“有销量的商品数量/所有商品数量”,而此题里指的是“有销量商品销售数量/剩余仍在销售的商品数量”
2. 直接连接两表求和存在风险:如果某个商品多次销售,则会连接后product表会出现多条行,导致求总存量 total_inventory 时增高
*/
/* # 简洁版本
SELECT style_id, ROUND(SUM(sku) / SUM(inventory - sku) * 100, 2) AS pin_rate,
ROUND(SUM(gmv) / SUM(inventory * tag_price) * 100,2) AS sell_through_rate
FROM
(SELECT item_id, SUM(sales_num) AS sku, SUM(sales_price) AS gmv -- 注意这里用的是 item_id 分组
FROM sales_tb
WHERE sales_date BETWEEN '2021-11-01' AND '2021-11-30'
GROUP BY item_id) t
LEFT JOIN product_tb pt USING(item_id)
GROUP by style_id
ORDER by style_id
*/
- 统计连续2天及以上在该店铺购物的用户及其对应的次数(若有多个用户,按user_id升序排序)
SELECT user_id, COUNT(1) AS days_count
FROM (
SELECT # 1. 对日期进行排序
DISTINCT sales_date, -- 可能有同一用户一天购物多次的情况
user_id,
DENSE_RANK() OVER (PARTITION BY user_id ORDER BY sales_date) AS ranking
FROM sales_tb
) a
GROUP BY user_id, DATE_SUB(sales_date, INTERVAL ranking DAY)
HAVING days_count > 1 # 2. 判断连续日期,并查询统计连续天数>=2的用户:使用date_sub()函数并聚合后判断日期连续 -> 如果 sales_date 减去ranking(天)是相同的日期,则说明他们这些天都是连续的日期(所以要使用dense_rank()并且对日期进行去重,不然重复的日期也会被记作连续的天数)
ORDER BY user_id
/* # 一个取巧的方法:连接自身
select s1.user_id AS user_id, (count(distinct s1.sales_date) + 1) AS days_count
from sales_tb s1 join sales_tb s2 on s1.user_id = s2.user_id
and s1.sales_date = date_add(s2.sales_date , interval 1 day)
group by user_id
order by user_id
*/
/* # 简洁版
SELECT user_id, (COUNT(user_id) + 1) AS days_count
FROM sales_tb
WHERE (user_id, sales_date) IN (SELECT user_id, DATE_ADD(sales_date, INTERVAL 1 DAY) FROM sales_tb)
GROUP BY user_id
ORDET BY user_id
*/
/* # 考虑周全的答案:同一个客户是可以有连续2天和连续2天以上的不同情况,这种需要在做一次聚合筛选
select user_id,max(days_count) days_count
from
(select distinct user_id,count(*) days_count
from(
select distinct user_id,sales_date,
dense_rank()over(partition by user_id order by sales_date) rk
from sales_tb
) t1
group by user_id,date_sub(sales_date,interval rk day)
having days_count >=2 ) t2
group by user_id
order by user_id
*/
- 统计每个科目的转换率(sign_rate(%),转化率=报名人数/浏览人数,结果保留两位小数)。按照course_id升序排序。
SELECT course_id, course_name, ROUND(SUM(if_sign) / SUM(if_vw) * 100, 2) AS 'sign_rate(%)'
FROM course_tb t1 LEFT JOIN behavior_tb t2 USING(course_id)
GROUP BY course_id, course_name
ORDER BY course_id
- 统计直播开始时(19:00),各科目的在线人数,以上例子的输出结果为(按照course_id升序排序)
SELECT
course_id,
course_name,
COUNT(user_id) AS online_num
FROM course_tb t1 LEFT JOIN attend_tb t2 USING(course_id)
WHERE DATE_FORMAT(in_datetime, '%H%i') <= 1900 AND DATE_FORMAT(out_datetime, '%H%i') > 1900
# WHERE TIME(in_datetime) <= '19:00:00' AND TIME(out_datetime) > '19:00:00'
# WHERE RIGHT(in_datetime, 8) <= '19:00:00' AND RIGHT(out_datetime, 8) > '19:00:00'
GROUP BY course_id, course_name
ORDER BY course_id
/*
关于本题对日期函数的使用考察最规范的描述:
WHERE TIME(in_datetime) <= DATE_FORMAT(course_datetime,'%H:%i:%s')
AND TIME(out_datetime) > DATE_FORMAT(course_datetime,'%H:%i:%s')
-- DATE_FORMAT(日期,'%Y-%m-%d %H-%i-%s')
*/
- 统计每个科目的平均观看时长(观看时长定义为离开直播间的时间与进入直播间的时间之差,单位是分钟),输出结果按平均观看时长降序排序,结果保留两位小数。
SELECT course_name, ROUND(AVG(TIMESTAMPDIFF(MINUTE, in_datetime, out_datetime)), 2) AS avg_len
FROM course_tb t1 LEFT JOIN attend_tb t2 USING(course_id)
GROUP BY course_name
ORDER BY avg_len DESC
- 统计每个科目的出勤率(attend_rate(%),结果保留两位小数),出勤率=出勤(在线时长10分钟及以上)人数 / 报名人数,输出结果按course_id升序排序
# 两个指标分开计算
SELECT
course_id,
course_name,
ROUND((SUM(len)/sign_cnt)*100, 2) 'attend_rate(%)'
FROM(
SELECT
course_name,
course_id,
SUM(if_sign) AS sign_cnt
FROM behavior_tb
JOIN course_tb USING(course_id)
GROUP BY course_id, course_name) TB1
JOIN(
SELECT
course_id,
user_id,
IF(SUM(TIMESTAMPDIFF(SECOND, in_datetime, out_datetime))>600, 1, 0) AS len -- 直接用MINUTE可能会少算。实际上还需要考虑进入时间是否在开播之前,最好也考虑离开时间
FROM attend_tb
GROUP BY course_id, user_id) TB2
USING(course_id)
GROUP BY course_id, course_name
ORDER BY course_id
/*
# 本题考察点是如何连接三张表。可以用不同的JOIN来直接连接三张表
select b.course_id, a.course_name,
round(count(distinct case when timestampdiff(minute, in_datetime, out_datetime) >= 10 then b.user_id else null end)/
count(distinct case when if_sign=1 then b.user_id else null end) * 100, 2)
from attend_tb c right join behavior_tb b using (user_id,course_id) join course_tb a using (course_id)
group by b.course_id, a.course_name
order by b.course_id
# 注意 b 表和 c 表连接了两个user_id, course_id,且因为 b 表包含 c 表,所以要用 right join。
进而,因为用的是right join,连接表之后,c 表中100客户一共两次登录,所以 b 表会重复100客户的情况,所以SUM(if_sign)也要去重
*/
/*
# 只考虑一个指标的详细计算
select
course_id,course_name,
round(count1/sum(if_sign)*100,2) as attend_rate
from (
select
course_id,count(distinct user_id) as count1
from attend_tb
where timestampdiff(second,in_datetime,out_datetime)>=10*60
group by course_id) a
join behavior_tb using (course_id)
join course_tb using (course_id)
group by course_id,course_name
order by course_id
*/
- 统计每个科目最大同时在线人数(按course_id排序)
和某度信息流T163的思路是一致的,虽然这道题中没有提到同一时间即有进来又有出去怎么处理,可以默认先计算进来的再计算出去的
- 统计11月份日人均回答量(回答问题数量/答题人数),按回答日期排序,结果保留两位小数
SELECT answer_date, ROUND(COUNT(*) / COUNT(DISTINCT author_id), 2) AS per_num
FROM answer_tb
WHERE MONTH(answer_date) = 11 -- DATE_FORMAT(answer_date, '%Y-%m') = '2021-11'
GROUP BY answer_date
ORDER BY answer_date
- 回答字数大于等于100字的认为是高质量回答,请你统计某乎问答高质量的回答中用户属于1-2级、3-4级、5-6级的数量分别是多少,按数量降序排列
/*
SELECT
(CASE WHEN author_level IN (1, 2) THEN "1-2级"
WHEN author_level IN (3, 4) THEN "3-4级"
ELSE "5-6级"
END ) AS level_cnt,
COUNT(author_level) AS num
FROM (
SELECT author_id, author_level
FROM author_tb t1 LEFT JOIN answer_tb t2 USING(author_id)
WHERE char_len >= 100
) a
GROUP BY level_cnt
ORDER BY num DESC
# 本来以为统计的是用户数,结果是回答数,所以没必要对用户去重,从而没必要想得这么复杂
*/
SELECT
(CASE WHEN author_level <= 2 THEN "1-2级"
WHEN author_level >= 5 THEN "5-6级"
ELSE "3-4级"
END ) AS level_cnt,
COUNT(*) AS num
FROM author_tb t1 LEFT JOIN answer_tb t2 USING(author_id)
WHERE char_len >= 100
GROUP BY 1
ORDER BY 2 DESC
- 统计11月份单日回答问题数大于等于3个的所有用户信息(author_date表示回答日期、author_id表示创作者id,answer_cnt表示回答问题个数)。若有多条数据符合条件,按answer_date、author_id升序排序
SELECT answer_date, author_id, COUNT(*) AS answer_cnt -- 计数不需去重
FROM answer_tb
WHERE MONTH(answer_date) = 11
GROUP BY answer_date, author_id
HAVING answer_cnt >= 3
ORDER BY answer_date, author_id
- 统计回答过教育类问题的用户里有多少用户回答过职场类问题
SELECT COUNT(DISTINCT author_id) AS num
FROM answer_tb t1 LEFT JOIN issue_tb t2 USING(issue_id)
WHERE author_id IN (
SELECT DISTINCT author_id
FROM answer_tb t1 LEFT JOIN issue_tb t2 USING(issue_id)
WHERE issue_type = 'Education'
)
AND issue_type = 'Career'
/*
# 取巧,计数法
SELECT COUNT(*) AS num
FROM (
SELECT author_id
FROM answer_tb t2 LEFT JOIN issue_tb t1 ON (t1.issue_id = t2.issue_id)
AND issue_type IN ('Education', 'Career')
GROUP BY author_id
HAVING COUNT(DISTINCT issue_type) = 2
) a
*/
/*
# 见题拆题法(可以直接从author_id的首字母推出这个问题分属什么类别)
SELECT COUNT(DISTINCT author_id) num
FROM answer_tb
WHERE issue_id LIKE 'E%'
AND author_id IN (SELECT author_id
FROM answer_tb
WHERE issue_id LIKE 'C%')
*/
- 统计最大连续回答问题的天数大于等于3天的用户及其等级(若有多条符合条件的数据,按author_id升序排序)
与之前的题目T184类似