牛客SQL-大厂面试真题

某音短视频

SQL156 各个视频的平均完播率【简单】

select
    t1.video_id,
    -- 结束观看时间与开始播放时间的差>=视频时长时,视为完成播放
    round(
        avg(if (end_time - start_time >= duration, 1, 0)),
        3
    ) as avg_comp_play_rate
from
    tb_user_video_log t1
    join tb_video_info t2 on t1.video_id = t2.video_id
where
    year (start_time) = 2021
group by
    t1.video_id
order by
    avg_comp_play_rate desc

使用timestampdiff会更好

avg( if timestampdiff(second,start_time,end_time) >= duration 1,0)

SQL157 平均播放进度大于60%的视频类别【简单】

select
    tag,
    concat (round(avg(play_progress), 2), "%") as avg_play_progress
from
    (
        select
            U.video_id,
            tag,
            duration,
            if (
                timestampdiff (second, start_time, end_time) / duration > 1,
                1,
                timestampdiff (second, start_time, end_time) / duration
            ) * 100 as play_progress
        from
            tb_user_video_log U
            left join tb_video_info V on U.video_id = V.video_id
    ) tb
group by
    tag
having
    avg_play_progress > 60
order by
    avg_play_progress desc

时间差值直接用减号相减容易出错,最好时间时间戳函数

TIMESTAMPDIFF(unit,begin,end)

TIMESTAMPDIFF函数返回begin-end的结果,其中begin和end是DATE或DATETIME表达式。

TIMESTAMPDIFF函数允许其参数具有混合类型,例如,begin是DATE值,end可以是DATETIME值。 如果使用DATE值,则TIMESTAMPDIFF函数将其视为时间部分为“00:00:00”的DATETIME值。

unit参数是确定(end-begin)的结果的单位,表示为整数,取值可为:YEAR/MONTH/DAY/HOUR/MINUTE/SECOND

SQL158 每类视频近一个月的转发量/率【中等】

DATEDIFF :求两个日期间隔的函数

select
    tag,
    sum(if_retweet) as retweet_cnt,
    round((sum(if_retweet) / count(U.uid)), 3) as retweet_rate
from
    tb_user_video_log as U
    left join tb_video_info as V on U.video_id = V.video_id
where
    -- 最近30天
    datediff (
        (
            select
                max(date (start_time))
            from
                tb_user_video_log
        ),
        date (start_time)
    ) < 30
group by
    tag
order by
    retweet_rate DESC

SQL159 每个创作者每月的涨粉率及截止当前的总粉丝量【中等】

难点 total_fans是当前月份的,不是某个月份分组的,需要累积,不能使用聚合函数,只能用窗口函数来统计。剩下的就是 case when 打标签

select
    author,
    month,
    -- 涨分率 这里不能用聚合函数
    round(follow_n_sum / play_cnt, 3) as fans_growth_rate,
    -- 当前粉丝总量
    sum(follow_n_sum) over (
        partition by
            author
        order by
            month
    ) total_fans
from
    (
        select
            author,
            date_format (start_time, '%Y-%m') as month,
            sum(
                case
                    when if_follow = 1 then 1 -- 涨粉
                    when if_follow = 2 then -1 -- 掉粉
                    else 0
                end
            ) as follow_n_sum,
            count(author) as play_cnt
        from
            tb_user_video_log as U
            left join tb_video_info as V on U.video_id = V.video_id
        where
            year (start_time) = 2021
        group by
            author,
            month
    ) T
order by
    author,
    total_fans

SQL160 国庆期间每类视频点赞量和转发量【较难】

开窗函数,滑动窗口

  • rows n perceding:从当前行到前n行(一共n+1行)
  • rang/rows between 边界规则1 and 边界规则2:rang表示按照值的范围进行定义框架,rows表示按照行的范围进行定义框架
rows between 2 perceding and 2 following # 当前行往前2行+当前行+当前行往后2行(一共5行)
rows between 1 following 3 following # 当前行的后1——>后3(共3行)
rows between unbounded preceding and current row # 从第一行到当前行

sql

select
    *
-- 先用窗口函数 再去筛选    
from
    (
        select
            tag,
            dt,
            -- 窗口向前7天
            sum(add_likes) over (
                partition by
                    tag
                order by
                    dt 
                    rows 6 preceding
            ) sum_like_cnt_7d,
            max(add_retweets) over (
                partition by
                    tag
                order by
                    dt 
                    rows 6 preceding
            ) max_retweet_cnt_7d
        from
            (
                -- 过滤 聚合
                select
                    tag,
                    date_format (start_time, '%Y-%m-%d') dt,
                    sum(if_like) add_likes,
                    sum(if_retweet) add_retweets
                from
                    tb_user_video_log t1
                    join tb_video_info t2 on t1.video_id = t2.video_id
                group by
                    tag,
                    dt
            ) a
    ) b
where
    dt between '2021-10-01' and '2021-10-03'
order by
    tag desc,
    dt

SQL161 近一个月发布的视频中热度最高的top3视频【困难】

新度 最近无播放的天数:所有数据的最新日期-某一视频的最新播放日期(聚合分组)

这是个聚合分组后,每个分组中的数据

注意这里要用date类型,否则过不了

select
    video_id,
    round((100 * complete_rate + 5 * like_cnt + 3 * comment_cnt + 2 * retweet_cnt) * fresh_index,0
 ) as hot_index
from
    (
        select
            vl.video_id,
            -- 平均完播率
            avg( if ( timestampdiff (second, start_time, end_time) >= duration,1,0)) as complete_rate,
            -- 点赞数
            sum(if_like) as like_cnt,
            -- 评论数
            count(comment_id) as comment_cnt,
            -- 转发数
            sum(if_retweet) as retweet_cnt,
            -- 新鲜度 最近无播放天数
            -- 所有数据的最新日期-某一视频的最新播放日期(聚合分组)
            1 / ( 
                timestampdiff(
                    day,
                    max(date (end_time)),
                    (
                        select max(date (end_time)) 
                        from tb_user_video_log
                    )
                ) + 1
            ) as fresh_index
        from
            tb_user_video_log as vl
            join tb_video_info as vi on vl.video_id = vi.video_id
        where
            -- 近一个月发布的视频
            -- 所有数据的最新日期-某个视频的发布日期<=29
            timestampdiff(day,date(release_time),
            (
                select max(date(end_time))
                from tb_user_video_log) 
            ) <= 29
        group by
            video_id
    ) as q
order by
    hot_index desc
limit
    3

用户增长场景(某度信息流)

SQL162 2021年11月每天的人均浏览文章时长【简单】

  • 计算每次文章浏览的时长和日期:
    • 过滤目标时间窗的有效浏览记录:WHERE artical_id != 0 AND DATE_FORMAT(in_time, "%Y%m") = "202111"
    • 将进入时间转化为日期:DATE(in_time) as dt
    • 计算浏览时长:TIMESTAMPDIFF(SECOND, in_time, out_time) as view_len_sec
  • 按日期分组:GROUP BY dt
  • 计算人均时长(=总时长÷人数):SUM(view_len_sec) / COUNT(DISTINCT uid) as avg_view_len_sec
  • 保留1位小数:ROUND(x, 1)

timestampdiff()函数的作用是返回两个日期时间之间的整数差。

select
    date(in_time) dt,
    round(
        sum(timestampdiff (second, in_time, out_time)) / count(distinct uid),
        1
    ) avg_lensec
from
    tb_user_log
where
    date_format(in_time, "%Y%m") = "202111"
    and artical_id != 0
group by
    dt
order by
    avg_lensec

SQL163 每篇文章同一时刻最大在看人数【中等】【同时在线】

同时在线人数问题

本题的坑点:

题目要求在瞬时统计时遵循【先进后出】:如果同一时刻有进入也有离开时,先记录用户数增加,再记录减少。

因此在ORDER BY层面,在遵循dt升序的同时,还要遵循先+1,再-1的原则,即diff DESC:

SUM(diff) OVER(PARTITION BY artical_id ORDER BY dt, diff DESC)

总体sql:

select
    artical_id,
    max(instant_viewer_cnt) as max_uv
from
    (
        select
            artical_id,
            event_time,
            sum(flag) over (partition by artical_id order by event_time, flag desc) as instant_viewer_cnt
        from
            (
                -- 登录标记1 登出标记-1
                select
                    uid,
                    artical_id,
                    in_time as event_time,
                    1 flag
                from
                    tb_user_log
                where artical_id != 0    
                union all
                select
                    uid,
                    artical_id,
                    out_time as event_time,
                    -1 flag
                from
                    tb_user_log
                where artical_id != 0
            ) t1
    ) t2
group by
    artical_id
order by max_uv desc

SQL164 2021年11月每天新用户的次日留存率【中等】【留存问题】

留存率的计算是经典的老问题了 就是left join

select
    t1.dt,
    round(count(distinct t2.uid) / count(t1.uid), 2) as uv_rate
from
    (
        select
            uid,
            min(date (in_time)) dt
        from
            tb_user_log
        group by
            uid
    ) t1 
    left join (
        select
            uid,
            date (in_time) dt
        from
            tb_user_log
        union
        select
            uid,
            date (out_time) dt
        from
            tb_user_log
    ) t2 -- 用户活跃表 进入时间和离开时间都算活跃
    on t1.uid = t2.uid
    and datediff(t2.dt,t1.dt)=1
where
    t1.dt like '2021-11%'
group by
    t1.dt
order by
    t1.dt;

上述代码似乎有问题,如果登陆时间跨越了了三天,刚好错过了第二天,拆出来的数据没有第二天,那结果不是错了?

SQL165 统计活跃间隔对用户分级结果【较难】

打标签的题 case when

select 
case 
	when max_day<7 and min_day>=7 then '忠实用户'
	when max_day<7 and min_day<7 then '新晋用户'
    when min_day between 7 and 29 then '沉睡用户'
    else '流失用户' end user_grade
,round(count(distinct uid)/(select count(distinct uid) from tb_user_log),2) ratio
    from(
		select *
        -- 每个用户最近一次登录差多少天
		,datediff(max(out_time)over(),max(out_time)over(partition by uid)) max_day 
        -- 每个用户最远一次登录差多少天
		,datediff(max(out_time)over(),min(out_time)over(partition by uid)) min_day 
		from tb_user_log
    )a
group by user_grade
order by ratio desc

这里 引入了两个概念

  • 每个用户最近一次登录差多少天
  • 每个用户最远一次登录差多少天

使用开窗函数来实现,非常巧妙

SQL166 每天的日活数及新用户占比【较难】

select
    t1.dt dt,
    count(distinct t1.uid) dau,
    round(
        count(distinct t2.uid) / count(distinct t1.uid),
        2
    ) ub_new_ratio
from
    (
        select
            uid,
            date (in_time) dt
        from
            tb_user_log
        union
        select
            uid,
            date (out_time) dt
        from
            tb_user_log
    ) t1 -- 用户活跃表 进入时间和离开时间都算活跃
    left join (
        select
            uid,
            min(date (in_time)) dt
        from
            tb_user_log
        group by
            uid
    ) t2 -- 每天的新用户表
    on t1.uid = t2.uid
    and t1.dt = t2.dt
group by
    dt
order by
    dt

与SQL164留存率问题相反

SQL167 连续签到领金币【困难】

两个难点:

  • 如何确定签到日期是否连续?(窗口排序)
  • 如何确定每一次签到获取金币的数量?(mod取余)
select 
  uid,
  date_format(dt,'%Y%m') as month,
  sum(grade) as coin
from(
    select uid ,dt,  
    case 
       when mod(rank() over (partition by uid,rank_day order by dt),7) = 3 then 3     
       when mod(rank() over (partition by uid,rank_day order by dt),7) = 0 then 7 
    else 1   
    end grade    
     from 
        (
        select uid, dt,
        date_sub(dt,interval rank() over(partition by uid order by dt) day) as rank_day -- 连续日期
        -- rank() over(partition by uid order by dt) as rk
        from(
            select uid, date(in_time) dt 
            from tb_user_log 
            where artical_id=0 and sign_in=1 and date(in_time) between '2021-07-07' and '2021-10-31' 
            group by uid,dt
            ) t1
        ) t2
     )t3
group by uid,month;

电商场景(某东商城)

SQL168 计算商城中2021年每月的GMV【简单】

GMV(Gross Merchandise Volume,商品交易总额)为已付款订单和未付款订单两者之和。结果按GMV升序排序。

-- 有年有月
SELECT date_format(event_time,'%Y-%m')  as month,
       sum(total_amount) as GMV
from tb_order_overall
-- status=2是退款
where status != 2 and year(event_time) = 2021
group by month
having GMV > 100000
order by GMV 

SQL169 统计2021年10月每个退货率不大于0.5的商品各项指标【中等】

select
    product_id,
    -- 商品点展比
    round(sum(if_click) / count(1), 3) ctr,
    -- 加购率
    round(sum(if_cart) / sum(if_click), 3) cart_rate,
    -- 成单率
    round(sum(if_payment) / sum(if_cart), 3) payment_rate,
    -- 退货率
    round(sum(if_refund) / sum(if_payment), 3) refund_rate
from
    tb_user_event
where
    -- 2021年10月
    date_format (event_time, '%Y-%m') = '2021-10'
group by
    product_id
having
    -- 退货率小于 0.5
    refund_rate <= 0.5
order by
    product_id

SQL170 某店铺的各商品毛利率及店铺整体毛利率【中等】

输出要涵盖 店铺和商品 需要用到Union

select
    '店铺汇总' as product_id,
    concat (
        round((1 - sum(in_price * cnt) / sum(price * cnt) )* 100,1),'%'
    ) as profit_rate
from
    tb_order_overall
    join tb_order_detail using (order_id)
    join tb_product_info using (product_id)
where
    date_format (event_time, '%Y-%m') >= '2021-10'
    and shop_id = 901    
union all
(
    select
        product_id,
        concat (
            round((1 - avg(in_price / price))* 100, 1),'%'
        ) profit_rate
    from
        tb_order_overall
        join tb_order_detail using (order_id)
        join tb_product_info using (product_id)
    where
        date_format (event_time, '%Y-%m') >= '2021-10'
        and shop_id = 901
    group by
        product_id
    having
        (1 - avg(in_price / price)) * 100 > 24.9
    order by
        product_id
)

SQL171 零食类商品中复购率top3高的商品【中等】

某商品复购率 = 近90天内购买它至少两次的人数 ÷ 购买它的总人数

近90天指包含最大日期(记为当天)在内的近90天。

select
    product_id,
    -- rk>=2 说明被复购
    round(count(distinct if(rk >= 2,uid,null))/count(distinct uid),3) rate
from(
    -- 统计每个货物用户的购买记录;利用窗口函数根据货号、用户分组按购买时间排序
    select
        product_id,
        uid,
        row_number() over(partition by product_id,uid order by event_time) rk
    from tb_order_detail
    left join tb_product_info using(product_id)
    left join tb_order_overall using (order_id)
    where tag='零食'
    and status=1
    and datediff(
        date(
            select
            max(event_time)
            from tb_order_overall
        ),date(event_time)) < 90 -- 90天内
) main
group by product_id
order by rate desc,product_id
limit 3

SQL172 10月的新户客单价和获客成本【较难】

一个uid在一个order_id中可能有多条记录,所以要用SUM窗口函数,单纯的聚合有可能出现一行对应多行的错误。同时窗口聚合的分组依据应该是order_id而不是uid

select 
    -- 首单平均交易金额
    round(avg(total_amount), 1) as avg_amount,
    -- 平均获客成本
    round(avg(total - total_amount), 1) as avg_cost
from(
    select 
        uid,
        event_time,
        row_number() over (partition by uid order by event_time) as rk,
        total_amount,
        -- 按order_id分组 订单明细的总和
        sum(price*cnt) over (partition by order_id) as total
    from tb_order_detail
    join tb_order_overall USING(order_id)
    where status = 1
    ) TB1
 where rk = 1 -- 首单 
 and date_format(event_time, '%Y-%m') = '2021-10'

SQL173 店铺901国庆期间的7日动销率和滞销率【困难】

​ 动销率定义为店铺中一段时间内有销量的商品占当前已上架总商品数的比例(有销量的商品/已上架总商品数)。

​ 滞销率定义为店铺中一段时间内没有销量的商品占当前已上架总商品数的比例。(没有销量的商品/已上架总商品数)。

select dt 
,round(count(distinct product_id)/avg(onsale_cnt),3)  as sale_rate
,round(1- count(distinct product_id)/avg(onsale_cnt),3) as unsale_rate
from 
    (
    -- 笛卡尔积+筛选获取901店铺在三天里上架商品数量
    select 
    date(event_time) as dt,
    count(
    distinct 
    case when 
         event_time>=release_time then product_id 
    end) as onsale_cnt 
    from tb_product_info,tb_order_overall
    where shop_id=901 
    and date(event_time) between '2021-10-01' and '2021-10-03'
    group by dt
    ) tb_3dayreli
left join 
    (
    -- 每个有下单的日期与下单商品id 
    select product_id,date(event_time) dtt
    from tb_order_overall join tb_order_detail
    using(order_id)
    join tb_product_info using(product_id) 
    where status=1 and shop_id=901
) tb_sellcnt 
on datediff(tb_3dayreli.dt,tb_sellcnt.dtt) between 0 and 6  #7天内
group by dt

出行场景(某滴打车)

SQL174 2021年国庆在北京接单3次及以上的司机统计信息【简单】

select
    city,
    ROUND(AVG(order_cnt), 3) as avg_order_num,
    ROUND(AVG(income_amount), 3) as avg_income
from
(
    -- 2021国庆期间在北京接单3次以上的司机
    SELECT
    city,
    driver_id,
    count(t1.order_id) as order_cnt,
    sum(fare) as income_amount
from tb_get_car_order as t1
join tb_get_car_record as t2
on t1.order_id = t2.order_id
where
    date(order_time) between '2021-10-01' and '2021-10-07'
    and city = '北京'
group by city, driver_id -- 去重
having order_cnt >= 3
) as t3
group by city;
-- 需要注意id一样的时候用聚合函数(SUM,AVG)需要再用一次GROUP BY,否则会报错;

SQL175 有取消订单记录的司机平均评分【简单】

(
    select driver_id            as id,
           round(avg(grade), 1) as avg_score
    from tb_get_car_order
    where driver_id in (
        -- 有取消订单的司机id
        select driver_id
        from tb_get_car_order
        where date_format(order_time, '%Y-%m') = '2021-10'
          and start_time is null
        group by driver_id
    )
    group by driver_id
    order by driver_id
)
union all
(
    select "总体"                 as id,
           round(avg(grade), 1) as avg_score
    from tb_get_car_order
    where driver_id in (
        select driver_id
        from tb_get_car_order
        where date_format(order_time, '%Y-%m') = '2021-10'
          and start_time is null
        group by driver_id
    )
)

SQL176 每个城市中评分最高的司机信息【中等】

select
    city,
    driver_id,
    avg_grade,
    cnt_order,
    cnt_mile
from
    (
        select
            b.city,
            a.driver_id,
            round(avg(grade), 1) as avg_grade, -- 平均评分 
            round(
                count(a.order_id) / count(distinct substring(order_time, 1, 10)),
                1
            ) as cnt_order, -- 日均接单量
            round(
                sum(mileage) / count(distinct substring(order_time, 1, 10)),
                3
            ) as cnt_mile, -- 日均行驶里程数
            dense_rank() over (
                partition by
                    b.city
                order by
                    avg(grade) desc
            ) as rk -- 按城市分区,按平均评分排序
        from
            tb_get_car_order a
        left join tb_get_car_record b on a.order_id = b.order_id -- 以订单表为左表
        group by
            -- 去重
            b.city,
            a.driver_id
    ) t
where
    rk = 1
order by
    cnt_order

SQL177 国庆期间近7日日均取消订单量【中等】

select 
	dt,
    round(finish_num_7d,2) finish_num_7d,
    round(cancel_num_7d,2) cancel_num_7d
 from (
	  select 
		dt,
        -- 从当前位置向前6天,每天的近7日
		avg(order_done) over(order by dt rows 6 preceding) finish_num_7d,
		avg(order_cancel) over(order by dt rows 6 preceding) cancel_num_7d
	 from (
		  select 
			date(order_time) as dt, 
			count(start_time) order_done,  -- 订单完成量 不为null
			sum(if(start_time is null,1,0)) order_cancel -- 订单取消量
		 from tb_get_car_order
		 group by date(order_time)
         ) tb
 ) tb1
where dt between '2021-10-01' and '2021-10-03' -- 国庆前三天
order by dt

SQL178 工作日各时段叫车量、等待接单时间和调度时间【较难】

select period, 
    count(1) as get_car_num,
    round(avg(wait_time/60), 1) as avg_wait_time, -- 秒转分
    round(avg(dispatch_time/60), 1) as avg_dispatch_time
from (
    select event_time,
	    -- case when 打标签
		-- event_time-开始打车时间为时段划分依据
        case
            when hour(event_time) in (7, 8) then '早高峰'
            when hour(event_time) between 9 and 16 then '工作时间'
            when hour(event_time) in (17, 18, 19) then '晚高峰'
            else '休息时间'
        end as period,
        timestampdiff(second, event_time, end_time) as wait_time, -- 等待时间
        timestampdiff(second, order_time, start_time) as dispatch_time -- 调度时间
    from tb_get_car_record r
    join tb_get_car_order o ON r.order_id = o.order_id
    where dayofweek(event_time) between 2 and 6 
) as t_wait_dispatch_time 
group by period
order by get_car_num;

-- timestampdiff()函数的作用是返回两个日期时间之间的整数差
-- dayofweek 返回日期在本周的星期几索引,星期天 = 1,星期一 = 2,星期六 = 7

SQL179 各城市最大同时等车人数 【较难】

select city, max(wait_uv) as max_wait_uv
from (
         select city,
                dt,
                sum(uv) over (partition by city order by dt asc,uv desc) as wait_uv
         from (   -- 开始打车
                  select city,
                         event_time as dt,
                         1          as uv
                  from tb_get_car_record
                  union all
                  -- 订单取消 或结束
                  select city,
                         if(start_time is not null, start_time, end_time) as dt,
                         -1                                               as uv
                  from tb_get_car_record as cr
                  inner join tb_get_car_order as co
                  on cr.order_id=co.order_id
              ) as tb1
     ) as tb2
where date_format (dt, '%Y-%m')='2021-10'
group by city
order by max_wait_uv, city;

同时在线问题 union all 登录登出 + 窗口排序

某宝店铺分析(电商模式)

SQL180 某宝店铺的SPU数量【简单】

select
    style_id,
    count(*) as SPU_num
from
    product_tb
group by
    style_id
order by
    SPU_num desc

SPU与SKU区别?

SQL181 某宝店铺的实际销售额与客单价【简单】

select
    sum(sales_price) as sales_total,
    round(sum(sales_price) / count(distinct user_id), 2) as per_trans
from
    sales_tb;

SQL182 某宝店铺折扣率【中等】

即Gross Merchandise Volume,主要是指网站的成交金额,而这里的成交金额包括:付款金额和未付款。类似题SQL168

1、GMV成交金额:累计结算金额,即 sum(sales_price)

2、吊牌总金额:产品数量*吊牌单价,即 sum(sales_num * tag_price)

select
    round(100*sum(sales_price) / sum(sales_num*tag_price), 2) as 'discount_rate(%)'
from
    sales_tb left join product_tb on sales_tb.item_id = product_tb.item_id

SQL183 某宝店铺动销率与售罄率【较难】

一、动销率

Stock Keeping Unit (库存量单位)。在很多业务场景下,是计算库存进出计量,物理上不可分割的最小存货单元,表示某款商品的具体配置(规格、颜色等)。

动销率 = 有销售的SKU数量 / 在售SKU数量

有销售的SKU数量:售出的SKU数量总和(商品期间销售数量)

在售的SKU数量 :剩余(总库存 - 售出)库存SKU的数量总和(商品期末库存数量)

二、售罄率

售罄率 = GMV / 备货值(备货值 = 吊牌价 * 库存数)

GMV :所有订单的金额总和(很多场景下,下单未付款的订单金额也计算在内)

吊牌价 :商品详情页显示的价格

问题:直接做连接做计算会导致有重复购买记录的商品的inventory多次计算,计算结果会变小。 要将两个表的计算分开进行。

select style_id,
    round(100*sum(num)/ (sum(inventory)-sum(num)),2) as 'pin_rate(%)',
    round(100*sum(item_GMV)/ sum(inventory*tag_price),2) as 'sell-through_rate(%)'
from product_tb a
join
(
    select item_id,
    sum(sales_num) as num,
    sum(sales_price) as item_GMV
 from sales_tb
 group by item_id) as b
on a.item_id=b.item_id
group by style_id
order by style_id

SQL184 某宝店铺连续2天及以上购物的用户及其对应的天数【较难】

MySQL的date_sub用法:

语法:date_sub(date,interval expr type),函数从日期减去指定的时间间隔

date_sub('2019-07-27', interval 30 day)表示往前推30天

可以直接 用日期-数字,但最好别这么勇

select
    user_id,
    count(diff) days_count
from
    (
        select
            user_id,
            date_sub(sales_date , interval row_number() over (partition by user_id order by sales_date) day )as diff
        from
            (   -- 同一天可能多个用户下单,进行去重
                select
                    user_id,
                    sales_date
                from
                    sales_tb
                group by
                    user_id,
                    sales_date
            ) t1 
    ) t2 -- 判断一串日期是否连续:若连续,用这个日期减去它的排名,会得到一个相同的结果
group by
    user_id,
    diff
having
    count(diff) >= 2 -- 连续下单大于等于两天
order by
    user_id

牛客直播课分析(在线教育行业)

SQL185 牛客直播转换率【简单】

select
    btb.course_id,
    course_name,
    round(sum(if_sign) / sum(if_vw) * 100, 2) as sign_rate
from
    course_tb ctb
    join behavior_tb btb on ctb.course_id = btb.course_id
group by -- 去重
    btb.course_id,
    course_name
order by
    btb.course_id

SQL186 牛客直播开始时各直播间在线人数【中等】

select
    atb.course_id,
    course_name,
    count(*) as online_num
from
    course_tb ctb
    join attend_tb atb on ctb.course_id = atb.course_id
where  '19:00' between date_format(in_datetime,'%H:%i') and date_format(out_datetime,'%H:%i')
group by
    atb.course_id,
    course_name
order by
    atb.course_id

开播人数:用户的进入时间在19点前(包含19点),离开时间在19点后(包含19点)

即,开播时间在进入后离开前,即 开播时间 between ‘进入时间’ and ‘离开时间’

时间的格式,h- i -s

SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H:%i:%s');   -- 结果:2020-12-07 22:18:58
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H:%i');      -- 结果:2020-12-07 22:18
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d %H');         -- 结果:2020-12-07 22
SELECT DATE_FORMAT(NOW(),'%Y-%m-%d');            -- 结果:2020-12-07
SELECT DATE_FORMAT(NOW(),'%H:%i:%s');            -- 结果:22:18:58
SELECT DATE_FORMAT(NOW(),'%H');                  -- 结果:22

SQL187 牛客直播各科目平均观看时长【中等】

select
    course_name,
    round(
        avg(timestampdiff (minute, in_datetime, out_datetime)),
        2
    ) as avg_Len
from
    course_tb ctb
    join attend_tb atb on ctb.course_id = atb.course_id
group by
    course_name
order by
    avg_Len desc

timestampdiff 求时间差 函数

语法: timestampdiff(unit(单位),begin,end)

​ begin和end可以为DATE或DATETIME类型,并且可允许参数为混合类型。

DATEDIFF函数返回date1 - date2的计算结果,date1和date2两个参数需是有效的日期或日期时间值;如果参数传递的是日期时间值,DATEDIFF函数仅将日期部分用于计算,并忽略时间部分(只有值的日期部分参与计算)

SELECT DATEDIFF('2022-04-30','2022-04-29');-- 1
SELECT DATEDIFF('2022-04-30','2022-04-30');-- 0
SELECT DATEDIFF('2022-04-29','2022-04-30');-- -1
SELECT DATEDIFF('2022-04-30','2022-04-30 14:00:00');-- 0
SELECT DATEDIFF('2022-04-30 13:00:00','2022-04-29 14:00:00');-- 1
SELECT DATEDIFF('2017-06-25 09:34:21', '2017-06-15');-- 10

TIMESTAMPDIFF函数用于计算两个日期的时间差

-- 相差:25秒
SELECT TIMESTAMPDIFF(SECOND,'2022-04-23 14:57:00','2022-04-23 14:57:25');
-- 相差:5分钟
SELECT TIMESTAMPDIFF(MINUTE,'2022-04-23 15:00:00','2022-04-23 15:05:00');
-- 相差:5小时
SELECT TIMESTAMPDIFF(HOUR,'2022-04-23 11:20:00', '2022-04-23 16:20:00');
-- 相差:2天
SELECT TIMESTAMPDIFF(DAY,'2022-04-23 11:20:00', '2022-04-25 11:20:00'); 
-- 相差:1星期
SELECT TIMESTAMPDIFF(WEEK,'2022-04-23 11:20:00', '2022-04-30 11:20:00');
-- 相差:1月
SELECT TIMESTAMPDIFF(MONTH,'2022-04-23 11:20:00', '2022-05-30 11:20:00');
-- 相差:1季度
SELECT TIMESTAMPDIFF(QUARTER,'2022-04-23 11:20:00', '2022-07-23 11:20:00');
-- 相差:1年
SELECT TIMESTAMPDIFF(YEAR,'2022-04-23 11:20:00', '2023-04-23 11:20:00');

SQL188 牛客直播各科目出勤率【较难】

一开始想着这么写,后来发现attend_tb 中会有重复

SELECT
    t1.course_id,
    course_name,
    ROUND(100 * t2.a / t1.b, 2) attend_rate
from
    (   -- 每个科目报名的人数
        SELECT
            course_id,
            count(user_id) b
        from
            behavior_tb
        where
            if_sign = 1
        group by
            course_id
    ) t1
    join (
        -- 每个科目出勤(在线时长10分钟及以上)人数
        SELECT
            course_id,
            count(distinct user_id) a
        from
            attend_tb
        where
            TIMESTAMPDIFF (MINUTE, in_datetime, out_datetime) >= 10
        group by
            course_id
    ) t2 on t2.course_id = t1.course_id
    join course_tb on t2.course_id = course_tb.course_id
ORDER BY
    course_id

SQL189 牛客直播各科目同时在线人数【较难】

select
    course_id,
    course_name,
    MAX(uv_cnt) max_num  -- 最大同时在线人数
from
    (
        select
            course_id,
            course_name,
            SUM(uv) OVER (
                PARTITION BY
                    course_id
                ORDER BY
                    dt,
                    uv DESC
            ) uv_cnt
        from
            (   -- 登入时间为1,登出时间为-1
                select
                    course_id,
                    user_id,
                    in_datetime dt,
                    1 as uv
                from
                    attend_tb
                union all
                select
                    course_id,
                    user_id,
                    out_datetime dt,
                    -1 as uv
                from
                    attend_tb
            ) uv_tb
            JOIN course_tb USING (course_id)
    ) t1
GROUP BY
    course_id,
    course_name
ORDER BY
    course_id;

某乎问答(内容行业)

SQL190 某乎问答11月份日人均回答量【简单】

select
    answer_date,
    round(count(issue_id) / count(DISTINCT author_id), 2) per_num
from
    answer_tb
where
    month (answer_date) = 11
group by
    answer_date

注意用户id去重

SQL191 某乎问答高质量的回答中用户属于各级别的数量【中等】

select
    case
        when b.author_level in (1, 2) then '1-2级'
        when b.author_level in (3, 4) then '3-4级'
        when b.author_level in (5, 6) then '5-6级'
        else ''
    end as level_cut, -- 作为新增的字段
    count(issue_id) num
from
    answer_tb a
    left join author_tb b on a.author_id = b.author_id
where
    char_len >= 100 -- 高质量回答
group by
    level_cut
order by
    num desc

case when 打标签

SQL192 某乎问答单日回答问题数大于等于3个的所有用户【中等】

select
  answer_date,author_id,count(issue_id) as answer_cnt
from answer_tb
where
    month (answer_date) = 11
group by
    answer_date,author_id 
having count(issue_id)>=3 -- 分组过滤   
order by answer_date

SQL193 某乎问答回答过教育类问题的用户里有多少用户回答过职场类问题【中等】

select
    count(distinct author_id) num
from
    issue_tb t1
    join answer_tb t2 on t1.issue_id = t2.issue_id
where
    issue_type = 'Education'
    and author_id in (
        -- 统计回答过教育类问题的用户
        select
            author_id
        from
            issue_tb a
            join answer_tb b on a.issue_id = b.issue_id
        where
            issue_type = 'Career'
    )

SQL194 某乎问答最大连续回答问题天数大于等于3天的用户及其对应等级【较难】【同时在线】

select
    t3.author_id,
    author_level,
    days_cnt
from
    (
        select
            author_id,
            count(diff) days_cnt
        from
            (
                select
                    author_id,
                    answer_date - row_number() over (
                        partition by
                            author_id
                        order by
                            answer_date
                    ) diff
                from
                    (

                        -- 按 用户 回答时间 过滤 
                        select
                            author_id,
                            answer_date
                        from
                            answer_tb
                            -- group by 去重
                        group by
                            author_id,
                            answer_date
                    ) t1
            ) t2 -- 判断一串日期是否连续:若连续,用这个日期减去它的排名,会得到一个相同的结果
        group by
            author_id,
            diff
        having
            count(diff) >= 3 -- 连续下单大于等于两天
        order by
            author_id
    ) t3
    join author_tb on t3.author_id = author_tb.author_id
posted @ 2023-07-16 13:45  王陸  阅读(612)  评论(1编辑  收藏  举报