牛客SQL-大厂面试真题
某音短视频
SQL156 各个视频的平均完播率【简单】
select
t1.video_id,
-- 结束观看时间与开始播放时间的差>=视频时长时,视为完成播放
round(
avg(if (end_time - start_time >= duration, 1, 0)),
3
) as avg_comp_play_rate
from
tb_user_video_log t1
join tb_video_info t2 on t1.video_id = t2.video_id
where
year (start_time) = 2021
group by
t1.video_id
order by
avg_comp_play_rate desc
使用timestampdiff会更好
avg( if timestampdiff(second,start_time,end_time) >= duration 1,0)
SQL157 平均播放进度大于60%的视频类别【简单】
select
tag,
concat (round(avg(play_progress), 2), "%") as avg_play_progress
from
(
select
U.video_id,
tag,
duration,
if (
timestampdiff (second, start_time, end_time) / duration > 1,
1,
timestampdiff (second, start_time, end_time) / duration
) * 100 as play_progress
from
tb_user_video_log U
left join tb_video_info V on U.video_id = V.video_id
) tb
group by
tag
having
avg_play_progress > 60
order by
avg_play_progress desc
时间差值直接用减号相减容易出错,最好时间时间戳函数
TIMESTAMPDIFF(unit,begin,end)
TIMESTAMPDIFF函数返回begin-end的结果,其中begin和end是DATE或DATETIME表达式。
TIMESTAMPDIFF函数允许其参数具有混合类型,例如,begin是DATE值,end可以是DATETIME值。 如果使用DATE值,则TIMESTAMPDIFF函数将其视为时间部分为“00:00:00”的DATETIME值。
unit参数是确定(end-begin)的结果的单位,表示为整数,取值可为:YEAR/MONTH/DAY/HOUR/MINUTE/SECOND
SQL158 每类视频近一个月的转发量/率【中等】
DATEDIFF :求两个日期间隔的函数
select
tag,
sum(if_retweet) as retweet_cnt,
round((sum(if_retweet) / count(U.uid)), 3) as retweet_rate
from
tb_user_video_log as U
left join tb_video_info as V on U.video_id = V.video_id
where
-- 最近30天
datediff (
(
select
max(date (start_time))
from
tb_user_video_log
),
date (start_time)
) < 30
group by
tag
order by
retweet_rate DESC
SQL159 每个创作者每月的涨粉率及截止当前的总粉丝量【中等】
难点 total_fans是当前月份的,不是某个月份分组的,需要累积,不能使用聚合函数,只能用窗口函数来统计。剩下的就是 case when 打标签
select
author,
month,
-- 涨分率 这里不能用聚合函数
round(follow_n_sum / play_cnt, 3) as fans_growth_rate,
-- 当前粉丝总量
sum(follow_n_sum) over (
partition by
author
order by
month
) total_fans
from
(
select
author,
date_format (start_time, '%Y-%m') as month,
sum(
case
when if_follow = 1 then 1 -- 涨粉
when if_follow = 2 then -1 -- 掉粉
else 0
end
) as follow_n_sum,
count(author) as play_cnt
from
tb_user_video_log as U
left join tb_video_info as V on U.video_id = V.video_id
where
year (start_time) = 2021
group by
author,
month
) T
order by
author,
total_fans
SQL160 国庆期间每类视频点赞量和转发量【较难】
开窗函数,滑动窗口
- rows n perceding:从当前行到前n行(一共n+1行)
- rang/rows between 边界规则1 and 边界规则2:rang表示按照值的范围进行定义框架,rows表示按照行的范围进行定义框架
rows between 2 perceding and 2 following # 当前行往前2行+当前行+当前行往后2行(一共5行)
rows between 1 following 3 following # 当前行的后1——>后3(共3行)
rows between unbounded preceding and current row # 从第一行到当前行
sql
select
*
-- 先用窗口函数 再去筛选
from
(
select
tag,
dt,
-- 窗口向前7天
sum(add_likes) over (
partition by
tag
order by
dt
rows 6 preceding
) sum_like_cnt_7d,
max(add_retweets) over (
partition by
tag
order by
dt
rows 6 preceding
) max_retweet_cnt_7d
from
(
-- 过滤 聚合
select
tag,
date_format (start_time, '%Y-%m-%d') dt,
sum(if_like) add_likes,
sum(if_retweet) add_retweets
from
tb_user_video_log t1
join tb_video_info t2 on t1.video_id = t2.video_id
group by
tag,
dt
) a
) b
where
dt between '2021-10-01' and '2021-10-03'
order by
tag desc,
dt
SQL161 近一个月发布的视频中热度最高的top3视频【困难】
新度 最近无播放的天数:所有数据的最新日期-某一视频的最新播放日期(聚合分组)
这是个聚合分组后,每个分组中的数据
注意这里要用date类型,否则过不了
select
video_id,
round((100 * complete_rate + 5 * like_cnt + 3 * comment_cnt + 2 * retweet_cnt) * fresh_index,0
) as hot_index
from
(
select
vl.video_id,
-- 平均完播率
avg( if ( timestampdiff (second, start_time, end_time) >= duration,1,0)) as complete_rate,
-- 点赞数
sum(if_like) as like_cnt,
-- 评论数
count(comment_id) as comment_cnt,
-- 转发数
sum(if_retweet) as retweet_cnt,
-- 新鲜度 最近无播放天数
-- 所有数据的最新日期-某一视频的最新播放日期(聚合分组)
1 / (
timestampdiff(
day,
max(date (end_time)),
(
select max(date (end_time))
from tb_user_video_log
)
) + 1
) as fresh_index
from
tb_user_video_log as vl
join tb_video_info as vi on vl.video_id = vi.video_id
where
-- 近一个月发布的视频
-- 所有数据的最新日期-某个视频的发布日期<=29
timestampdiff(day,date(release_time),
(
select max(date(end_time))
from tb_user_video_log)
) <= 29
group by
video_id
) as q
order by
hot_index desc
limit
3
用户增长场景(某度信息流)
SQL162 2021年11月每天的人均浏览文章时长【简单】
- 计算每次文章浏览的时长和日期:
- 过滤目标时间窗的有效浏览记录:WHERE artical_id != 0 AND DATE_FORMAT(in_time, "%Y%m") = "202111"
- 将进入时间转化为日期:DATE(in_time) as dt
- 计算浏览时长:TIMESTAMPDIFF(SECOND, in_time, out_time) as view_len_sec
- 按日期分组:GROUP BY dt
- 计算人均时长(=总时长÷人数):SUM(view_len_sec) / COUNT(DISTINCT uid) as avg_view_len_sec
- 保留1位小数:ROUND(x, 1)
timestampdiff()函数的作用是返回两个日期时间之间的整数差。
select
date(in_time) dt,
round(
sum(timestampdiff (second, in_time, out_time)) / count(distinct uid),
1
) avg_lensec
from
tb_user_log
where
date_format(in_time, "%Y%m") = "202111"
and artical_id != 0
group by
dt
order by
avg_lensec
SQL163 每篇文章同一时刻最大在看人数【中等】【同时在线】
同时在线人数问题
本题的坑点:
题目要求在瞬时统计时遵循【先进后出】:如果同一时刻有进入也有离开时,先记录用户数增加,再记录减少。
因此在ORDER BY层面,在遵循dt升序的同时,还要遵循先+1,再-1的原则,即diff DESC:
SUM(diff) OVER(PARTITION BY artical_id ORDER BY dt, diff DESC)
总体sql:
select
artical_id,
max(instant_viewer_cnt) as max_uv
from
(
select
artical_id,
event_time,
sum(flag) over (partition by artical_id order by event_time, flag desc) as instant_viewer_cnt
from
(
-- 登录标记1 登出标记-1
select
uid,
artical_id,
in_time as event_time,
1 flag
from
tb_user_log
where artical_id != 0
union all
select
uid,
artical_id,
out_time as event_time,
-1 flag
from
tb_user_log
where artical_id != 0
) t1
) t2
group by
artical_id
order by max_uv desc
SQL164 2021年11月每天新用户的次日留存率【中等】【留存问题】
留存率的计算是经典的老问题了 就是left join
select
t1.dt,
round(count(distinct t2.uid) / count(t1.uid), 2) as uv_rate
from
(
select
uid,
min(date (in_time)) dt
from
tb_user_log
group by
uid
) t1
left join (
select
uid,
date (in_time) dt
from
tb_user_log
union
select
uid,
date (out_time) dt
from
tb_user_log
) t2 -- 用户活跃表 进入时间和离开时间都算活跃
on t1.uid = t2.uid
and datediff(t2.dt,t1.dt)=1
where
t1.dt like '2021-11%'
group by
t1.dt
order by
t1.dt;
上述代码似乎有问题,如果登陆时间跨越了了三天,刚好错过了第二天,拆出来的数据没有第二天,那结果不是错了?
SQL165 统计活跃间隔对用户分级结果【较难】
打标签的题 case when
select
case
when max_day<7 and min_day>=7 then '忠实用户'
when max_day<7 and min_day<7 then '新晋用户'
when min_day between 7 and 29 then '沉睡用户'
else '流失用户' end user_grade
,round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) ratio
from(
select *
-- 每个用户最近一次登录差多少天
,datediff(max(out_time)over(),max(out_time)over(partition by uid)) max_day
-- 每个用户最远一次登录差多少天
,datediff(max(out_time)over(),min(out_time)over(partition by uid)) min_day
from tb_user_log
)a
group by user_grade
order by ratio desc
这里 引入了两个概念
- 每个用户最近一次登录差多少天
- 每个用户最远一次登录差多少天
使用开窗函数来实现,非常巧妙
SQL166 每天的日活数及新用户占比【较难】
select
t1.dt dt,
count(distinct t1.uid) dau,
round(
count(distinct t2.uid) / count(distinct t1.uid),
2
) ub_new_ratio
from
(
select
uid,
date (in_time) dt
from
tb_user_log
union
select
uid,
date (out_time) dt
from
tb_user_log
) t1 -- 用户活跃表 进入时间和离开时间都算活跃
left join (
select
uid,
min(date (in_time)) dt
from
tb_user_log
group by
uid
) t2 -- 每天的新用户表
on t1.uid = t2.uid
and t1.dt = t2.dt
group by
dt
order by
dt
与SQL164留存率问题相反
SQL167 连续签到领金币【困难】
两个难点:
- 如何确定签到日期是否连续?(窗口排序)
- 如何确定每一次签到获取金币的数量?(mod取余)
select
uid,
date_format(dt,'%Y%m') as month,
sum(grade) as coin
from(
select uid ,dt,
case
when mod(rank() over (partition by uid,rank_day order by dt),7) = 3 then 3
when mod(rank() over (partition by uid,rank_day order by dt),7) = 0 then 7
else 1
end grade
from
(
select uid, dt,
date_sub(dt,interval rank() over(partition by uid order by dt) day) as rank_day -- 连续日期
-- rank() over(partition by uid order by dt) as rk
from(
select uid, date(in_time) dt
from tb_user_log
where artical_id=0 and sign_in=1 and date(in_time) between '2021-07-07' and '2021-10-31'
group by uid,dt
) t1
) t2
)t3
group by uid,month;
电商场景(某东商城)
SQL168 计算商城中2021年每月的GMV【简单】
GMV(Gross Merchandise Volume,商品交易总额)为已付款订单和未付款订单两者之和。结果按GMV升序排序。
-- 有年有月
SELECT date_format(event_time,'%Y-%m') as month,
sum(total_amount) as GMV
from tb_order_overall
-- status=2是退款
where status != 2 and year(event_time) = 2021
group by month
having GMV > 100000
order by GMV
SQL169 统计2021年10月每个退货率不大于0.5的商品各项指标【中等】
select
product_id,
-- 商品点展比
round(sum(if_click) / count(1), 3) ctr,
-- 加购率
round(sum(if_cart) / sum(if_click), 3) cart_rate,
-- 成单率
round(sum(if_payment) / sum(if_cart), 3) payment_rate,
-- 退货率
round(sum(if_refund) / sum(if_payment), 3) refund_rate
from
tb_user_event
where
-- 2021年10月
date_format (event_time, '%Y-%m') = '2021-10'
group by
product_id
having
-- 退货率小于 0.5
refund_rate <= 0.5
order by
product_id
SQL170 某店铺的各商品毛利率及店铺整体毛利率【中等】
输出要涵盖 店铺和商品 需要用到Union
select
'店铺汇总' as product_id,
concat (
round((1 - sum(in_price * cnt) / sum(price * cnt) )* 100,1),'%'
) as profit_rate
from
tb_order_overall
join tb_order_detail using (order_id)
join tb_product_info using (product_id)
where
date_format (event_time, '%Y-%m') >= '2021-10'
and shop_id = 901
union all
(
select
product_id,
concat (
round((1 - avg(in_price / price))* 100, 1),'%'
) profit_rate
from
tb_order_overall
join tb_order_detail using (order_id)
join tb_product_info using (product_id)
where
date_format (event_time, '%Y-%m') >= '2021-10'
and shop_id = 901
group by
product_id
having
(1 - avg(in_price / price)) * 100 > 24.9
order by
product_id
)
SQL171 零食类商品中复购率top3高的商品【中等】
某商品复购率 = 近90天内购买它至少两次的人数 ÷ 购买它的总人数
近90天指包含最大日期(记为当天)在内的近90天。
select
product_id,
-- rk>=2 说明被复购
round(count(distinct if(rk >= 2,uid,null))/count(distinct uid),3) rate
from(
-- 统计每个货物用户的购买记录;利用窗口函数根据货号、用户分组按购买时间排序
select
product_id,
uid,
row_number() over(partition by product_id,uid order by event_time) rk
from tb_order_detail
left join tb_product_info using(product_id)
left join tb_order_overall using (order_id)
where tag='零食'
and status=1
and datediff(
date(
select
max(event_time)
from tb_order_overall
),date(event_time)) < 90 -- 90天内
) main
group by product_id
order by rate desc,product_id
limit 3
SQL172 10月的新户客单价和获客成本【较难】
一个uid在一个order_id中可能有多条记录,所以要用SUM窗口函数,单纯的聚合有可能出现一行对应多行的错误。同时窗口聚合的分组依据应该是order_id而不是uid
select
-- 首单平均交易金额
round(avg(total_amount), 1) as avg_amount,
-- 平均获客成本
round(avg(total - total_amount), 1) as avg_cost
from(
select
uid,
event_time,
row_number() over (partition by uid order by event_time) as rk,
total_amount,
-- 按order_id分组 订单明细的总和
sum(price*cnt) over (partition by order_id) as total
from tb_order_detail
join tb_order_overall USING(order_id)
where status = 1
) TB1
where rk = 1 -- 首单
and date_format(event_time, '%Y-%m') = '2021-10'
SQL173 店铺901国庆期间的7日动销率和滞销率【困难】
动销率定义为店铺中一段时间内有销量的商品占当前已上架总商品数的比例(有销量的商品/已上架总商品数)。
滞销率定义为店铺中一段时间内没有销量的商品占当前已上架总商品数的比例。(没有销量的商品/已上架总商品数)。
select dt
,round(count(distinct product_id)/avg(onsale_cnt),3) as sale_rate
,round(1- count(distinct product_id)/avg(onsale_cnt),3) as unsale_rate
from
(
-- 笛卡尔积+筛选获取901店铺在三天里上架商品数量
select
date(event_time) as dt,
count(
distinct
case when
event_time>=release_time then product_id
end) as onsale_cnt
from tb_product_info,tb_order_overall
where shop_id=901
and date(event_time) between '2021-10-01' and '2021-10-03'
group by dt
) tb_3dayreli
left join
(
-- 每个有下单的日期与下单商品id
select product_id,date(event_time) dtt
from tb_order_overall join tb_order_detail
using(order_id)
join tb_product_info using(product_id)
where status=1 and shop_id=901
) tb_sellcnt
on datediff(tb_3dayreli.dt,tb_sellcnt.dtt) between 0 and 6 #7天内
group by dt
出行场景(某滴打车)
SQL174 2021年国庆在北京接单3次及以上的司机统计信息【简单】
select
city,
ROUND(AVG(order_cnt), 3) as avg_order_num,
ROUND(AVG(income_amount), 3) as avg_income
from
(
-- 2021国庆期间在北京接单3次以上的司机
SELECT
city,
driver_id,
count(t1.order_id) as order_cnt,
sum(fare) as income_amount
from tb_get_car_order as t1
join tb_get_car_record as t2
on t1.order_id = t2.order_id
where
date(order_time) between '2021-10-01' and '2021-10-07'
and city = '北京'
group by city, driver_id -- 去重
having order_cnt >= 3
) as t3
group by city;
-- 需要注意id一样的时候用聚合函数(SUM,AVG)需要再用一次GROUP BY,否则会报错;
SQL175 有取消订单记录的司机平均评分【简单】
(
select driver_id as id,
round(avg(grade), 1) as avg_score
from tb_get_car_order
where driver_id in (
-- 有取消订单的司机id
select driver_id
from tb_get_car_order
where date_format(order_time, '%Y-%m') = '2021-10'
and start_time is null
group by driver_id
)
group by driver_id
order by driver_id
)
union all
(
select "总体" as id,
round(avg(grade), 1) as avg_score
from tb_get_car_order
where driver_id in (
select driver_id
from tb_get_car_order
where date_format(order_time, '%Y-%m') = '2021-10'
and start_time is null
group by driver_id
)
)
SQL176 每个城市中评分最高的司机信息【中等】
select
city,
driver_id,
avg_grade,
cnt_order,
cnt_mile
from
(
select
b.city,
a.driver_id,
round(avg(grade), 1) as avg_grade, -- 平均评分
round(
count(a.order_id) / count(distinct substring(order_time, 1, 10)),
1
) as cnt_order, -- 日均接单量
round(
sum(mileage) / count(distinct substring(order_time, 1, 10)),
3
) as cnt_mile, -- 日均行驶里程数
dense_rank() over (
partition by
b.city
order by
avg(grade) desc
) as rk -- 按城市分区,按平均评分排序
from
tb_get_car_order a
left join tb_get_car_record b on a.order_id = b.order_id -- 以订单表为左表
group by
-- 去重
b.city,
a.driver_id
) t
where
rk = 1
order by
cnt_order
SQL177 国庆期间近7日日均取消订单量【中等】
select
dt,
round(finish_num_7d,2) finish_num_7d,
round(cancel_num_7d,2) cancel_num_7d
from (
select
dt,
-- 从当前位置向前6天,每天的近7日
avg(order_done) over(order by dt rows 6 preceding) finish_num_7d,
avg(order_cancel) over(order by dt rows 6 preceding) cancel_num_7d
from (
select
date(order_time) as dt,
count(start_time) order_done, -- 订单完成量 不为null
sum(if(start_time is null,1,0)) order_cancel -- 订单取消量
from tb_get_car_order
group by date(order_time)
) tb
) tb1
where dt between '2021-10-01' and '2021-10-03' -- 国庆前三天
order by dt
SQL178 工作日各时段叫车量、等待接单时间和调度时间【较难】
select period,
count(1) as get_car_num,
round(avg(wait_time/60), 1) as avg_wait_time, -- 秒转分
round(avg(dispatch_time/60), 1) as avg_dispatch_time
from (
select event_time,
-- case when 打标签
-- event_time-开始打车时间为时段划分依据
case
when hour(event_time) in (7, 8) then '早高峰'
when hour(event_time) between 9 and 16 then '工作时间'
when hour(event_time) in (17, 18, 19) then '晚高峰'
else '休息时间'
end as period,
timestampdiff(second, event_time, end_time) as wait_time, -- 等待时间
timestampdiff(second, order_time, start_time) as dispatch_time -- 调度时间
from tb_get_car_record r
join tb_get_car_order o ON r.order_id = o.order_id
where dayofweek(event_time) between 2 and 6
) as t_wait_dispatch_time
group by period
order by get_car_num;
-- timestampdiff()函数的作用是返回两个日期时间之间的整数差
-- dayofweek 返回日期在本周的星期几索引,星期天 = 1,星期一 = 2,星期六 = 7
SQL179 各城市最大同时等车人数 【较难】
select city, max(wait_uv) as max_wait_uv
from (
select city,
dt,
sum(uv) over (partition by city order by dt asc,uv desc) as wait_uv
from ( -- 开始打车
select city,
event_time as dt,
1 as uv
from tb_get_car_record
union all
-- 订单取消 或结束
select city,
if(start_time is not null, start_time, end_time) as dt,
-1 as uv
from tb_get_car_record as cr
inner join tb_get_car_order as co
on cr.order_id=co.order_id
) as tb1
) as tb2
where date_format (dt, '%Y-%m')='2021-10'
group by city
order by max_wait_uv, city;
同时在线问题 union all 登录登出 + 窗口排序
某宝店铺分析(电商模式)
SQL180 某宝店铺的SPU数量【简单】
select
style_id,
count(*) as SPU_num
from
product_tb
group by
style_id
order by
SPU_num desc
SPU与SKU区别?
SQL181 某宝店铺的实际销售额与客单价【简单】
select
sum(sales_price) as sales_total,
round(sum(sales_price) / count(distinct user_id), 2) as per_trans
from
sales_tb;
SQL182 某宝店铺折扣率【中等】
即Gross Merchandise Volume,主要是指网站的成交金额,而这里的成交金额包括:付款金额和未付款。类似题SQL168
1、GMV成交金额:累计结算金额,即 sum(sales_price)
2、吊牌总金额:产品数量*吊牌单价,即 sum(sales_num * tag_price)
select
round(100*sum(sales_price) / sum(sales_num*tag_price), 2) as 'discount_rate(%)'
from
sales_tb left join product_tb on sales_tb.item_id = product_tb.item_id
SQL183 某宝店铺动销率与售罄率【较难】
一、动销率
Stock Keeping Unit (库存量单位)。在很多业务场景下,是计算库存进出计量,物理上不可分割的最小存货单元,表示某款商品的具体配置(规格、颜色等)。
动销率 = 有销售的SKU数量 / 在售SKU数量
有销售的SKU数量:售出的SKU数量总和(商品期间销售数量)
在售的SKU数量 :剩余(总库存 - 售出)库存SKU的数量总和(商品期末库存数量)
二、售罄率
售罄率 = GMV / 备货值(备货值 = 吊牌价 * 库存数)
GMV :所有订单的金额总和(很多场景下,下单未付款的订单金额也计算在内)
吊牌价 :商品详情页显示的价格
问题:直接做连接做计算会导致有重复购买记录的商品的inventory多次计算,计算结果会变小。 要将两个表的计算分开进行。
select style_id,
round(100*sum(num)/ (sum(inventory)-sum(num)),2) as 'pin_rate(%)',
round(100*sum(item_GMV)/ sum(inventory*tag_price),2) as 'sell-through_rate(%)'
from product_tb a
join
(
select item_id,
sum(sales_num) as num,
sum(sales_price) as item_GMV
from sales_tb
group by item_id) as b
on a.item_id=b.item_id
group by style_id
order by style_id
SQL184 某宝店铺连续2天及以上购物的用户及其对应的天数【较难】
MySQL的date_sub用法:
语法:date_sub(date,interval expr type),函数从日期减去指定的时间间隔
date_sub('2019-07-27', interval 30 day)表示往前推30天
可以直接 用日期-数字,但最好别这么勇
select
user_id,
count(diff) days_count
from
(
select
user_id,
date_sub(sales_date , interval row_number() over (partition by user_id order by sales_date) day )as diff
from
( -- 同一天可能多个用户下单,进行去重
select
user_id,
sales_date
from
sales_tb
group by
user_id,
sales_date
) t1
) t2 -- 判断一串日期是否连续:若连续,用这个日期减去它的排名,会得到一个相同的结果
group by
user_id,
diff
having
count(diff) >= 2 -- 连续下单大于等于两天
order by
user_id
牛客直播课分析(在线教育行业)
SQL185 牛客直播转换率【简单】
select
btb.course_id,
course_name,
round(sum(if_sign) / sum(if_vw) * 100, 2) as sign_rate
from
course_tb ctb
join behavior_tb btb on ctb.course_id = btb.course_id
group by -- 去重
btb.course_id,
course_name
order by
btb.course_id
SQL186 牛客直播开始时各直播间在线人数【中等】
select
atb.course_id,
course_name,
count(*) as online_num
from
course_tb ctb
join attend_tb atb on ctb.course_id = atb.course_id
where '19:00' between date_format(in_datetime,'%H:%i') and date_format(out_datetime,'%H:%i')
group by
atb.course_id,
course_name
order by
atb.course_id
开播人数:用户的进入时间在19点前(包含19点),离开时间在19点后(包含19点)
即,开播时间在进入后离开前,即 开播时间 between ‘进入时间’ and ‘离开时间’
时间的格式,h- i -s
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H:%i:%s'); -- 结果:2020-12-07 22:18:58
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H:%i'); -- 结果:2020-12-07 22:18
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H'); -- 结果:2020-12-07 22
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d'); -- 结果:2020-12-07
SELECT DATE_FORMAT(NOW(),'%H:%i:%s'); -- 结果:22:18:58
SELECT DATE_FORMAT(NOW(),'%H'); -- 结果:22
SQL187 牛客直播各科目平均观看时长【中等】
select
course_name,
round(
avg(timestampdiff (minute, in_datetime, out_datetime)),
2
) as avg_Len
from
course_tb ctb
join attend_tb atb on ctb.course_id = atb.course_id
group by
course_name
order by
avg_Len desc
timestampdiff 求时间差 函数
语法: timestampdiff(unit(单位),begin,end)
begin和end可以为DATE或DATETIME类型,并且可允许参数为混合类型。
DATEDIFF函数返回date1 - date2的计算结果,date1和date2两个参数需是有效的日期或日期时间值;如果参数传递的是日期时间值,DATEDIFF函数仅将日期部分用于计算,并忽略时间部分(只有值的日期部分参与计算)
SELECT DATEDIFF('2022-04-30','2022-04-29');-- 1
SELECT DATEDIFF('2022-04-30','2022-04-30');-- 0
SELECT DATEDIFF('2022-04-29','2022-04-30');-- -1
SELECT DATEDIFF('2022-04-30','2022-04-30 14:00:00');-- 0
SELECT DATEDIFF('2022-04-30 13:00:00','2022-04-29 14:00:00');-- 1
SELECT DATEDIFF('2017-06-25 09:34:21', '2017-06-15');-- 10
TIMESTAMPDIFF函数用于计算两个日期的时间差
-- 相差:25秒
SELECT TIMESTAMPDIFF(SECOND,'2022-04-23 14:57:00','2022-04-23 14:57:25');
-- 相差:5分钟
SELECT TIMESTAMPDIFF(MINUTE,'2022-04-23 15:00:00','2022-04-23 15:05:00');
-- 相差:5小时
SELECT TIMESTAMPDIFF(HOUR,'2022-04-23 11:20:00', '2022-04-23 16:20:00');
-- 相差:2天
SELECT TIMESTAMPDIFF(DAY,'2022-04-23 11:20:00', '2022-04-25 11:20:00');
-- 相差:1星期
SELECT TIMESTAMPDIFF(WEEK,'2022-04-23 11:20:00', '2022-04-30 11:20:00');
-- 相差:1月
SELECT TIMESTAMPDIFF(MONTH,'2022-04-23 11:20:00', '2022-05-30 11:20:00');
-- 相差:1季度
SELECT TIMESTAMPDIFF(QUARTER,'2022-04-23 11:20:00', '2022-07-23 11:20:00');
-- 相差:1年
SELECT TIMESTAMPDIFF(YEAR,'2022-04-23 11:20:00', '2023-04-23 11:20:00');
SQL188 牛客直播各科目出勤率【较难】
一开始想着这么写,后来发现attend_tb 中会有重复
SELECT
t1.course_id,
course_name,
ROUND(100 * t2.a / t1.b, 2) attend_rate
from
( -- 每个科目报名的人数
SELECT
course_id,
count(user_id) b
from
behavior_tb
where
if_sign = 1
group by
course_id
) t1
join (
-- 每个科目出勤(在线时长10分钟及以上)人数
SELECT
course_id,
count(distinct user_id) a
from
attend_tb
where
TIMESTAMPDIFF (MINUTE, in_datetime, out_datetime) >= 10
group by
course_id
) t2 on t2.course_id = t1.course_id
join course_tb on t2.course_id = course_tb.course_id
ORDER BY
course_id
SQL189 牛客直播各科目同时在线人数【较难】
select
course_id,
course_name,
MAX(uv_cnt) max_num -- 最大同时在线人数
from
(
select
course_id,
course_name,
SUM(uv) OVER (
PARTITION BY
course_id
ORDER BY
dt,
uv DESC
) uv_cnt
from
( -- 登入时间为1,登出时间为-1
select
course_id,
user_id,
in_datetime dt,
1 as uv
from
attend_tb
union all
select
course_id,
user_id,
out_datetime dt,
-1 as uv
from
attend_tb
) uv_tb
JOIN course_tb USING (course_id)
) t1
GROUP BY
course_id,
course_name
ORDER BY
course_id;
某乎问答(内容行业)
SQL190 某乎问答11月份日人均回答量【简单】
select
answer_date,
round(count(issue_id) / count(DISTINCT author_id), 2) per_num
from
answer_tb
where
month (answer_date) = 11
group by
answer_date
注意用户id去重
SQL191 某乎问答高质量的回答中用户属于各级别的数量【中等】
select
case
when b.author_level in (1, 2) then '1-2级'
when b.author_level in (3, 4) then '3-4级'
when b.author_level in (5, 6) then '5-6级'
else ''
end as level_cut, -- 作为新增的字段
count(issue_id) num
from
answer_tb a
left join author_tb b on a.author_id = b.author_id
where
char_len >= 100 -- 高质量回答
group by
level_cut
order by
num desc
case when 打标签
SQL192 某乎问答单日回答问题数大于等于3个的所有用户【中等】
select
answer_date,author_id,count(issue_id) as answer_cnt
from answer_tb
where
month (answer_date) = 11
group by
answer_date,author_id
having count(issue_id)>=3 -- 分组过滤
order by answer_date
SQL193 某乎问答回答过教育类问题的用户里有多少用户回答过职场类问题【中等】
select
count(distinct author_id) num
from
issue_tb t1
join answer_tb t2 on t1.issue_id = t2.issue_id
where
issue_type = 'Education'
and author_id in (
-- 统计回答过教育类问题的用户
select
author_id
from
issue_tb a
join answer_tb b on a.issue_id = b.issue_id
where
issue_type = 'Career'
)
SQL194 某乎问答最大连续回答问题天数大于等于3天的用户及其对应等级【较难】【同时在线】
select
t3.author_id,
author_level,
days_cnt
from
(
select
author_id,
count(diff) days_cnt
from
(
select
author_id,
answer_date - row_number() over (
partition by
author_id
order by
answer_date
) diff
from
(
-- 按 用户 回答时间 过滤
select
author_id,
answer_date
from
answer_tb
-- group by 去重
group by
author_id,
answer_date
) t1
) t2 -- 判断一串日期是否连续:若连续,用这个日期减去它的排名,会得到一个相同的结果
group by
author_id,
diff
having
count(diff) >= 3 -- 连续下单大于等于两天
order by
author_id
) t3
join author_tb on t3.author_id = author_tb.author_id