欢迎访问yhm138的博客园博客, 你可以通过 [RSS] 的方式持续关注博客更新

MyAvatar

yhm138

HelloWorld!

【LeetCode1651. Hopper 公司查询 III 】over(ORDER BY str_to_date(`month`, '%Y-%m') rows between CURRENT ROW AND 2 following)实现【当月和接下来2个月】滑动窗口

题目地址

https://leetcode.cn/problems/hopper-company-queries-iii/description/

MySQL代码

-- CTE生成2020年每月的最后一天
WITH RECURSIVE months AS (
 SELECT LAST_DAY('2019-12-01') AS month_end -- 初始日期的前一个月的最后一天
 UNION ALL
 SELECT LAST_DAY(month_end + INTERVAL 1 DAY) -- 下一个月的最后一天
 FROM months
 WHERE month_end < '2020-12-01' -- 递归结束条件
)
,
months2020 as (
   SELECT *, DATE_FORMAT(month_end, '%Y-%m') AS `month` -- 格式化日期,只保留年月
   FROM months
   where DATE_FORMAT(month_end, '%Y')="2020"
)
,
t1 as(
   SELECT months2020.`month`
   , ifnull(sum( ar.ride_distance) , 0 ) as ride_distance
   , ifnull(sum( ar.ride_duration)  ,0) as ride_duration
   from   months2020
   left join Rides r 
   on months2020.`month`=DATE_FORMAT(r.requested_at,"%Y-%m")
   left join AcceptedRides ar
   on ar.ride_id=r.ride_id
   group by months2020.`month`
)


SELECT * from (
   SELECT
   MONTH(str_to_date(`month`, '%Y-%m')) AS `month`,
   round(AVG(ride_distance) OVER (
       ORDER BY str_to_date(`month`, '%Y-%m')
       ROWS BETWEEN  CURRENT ROW  AND  2 following 
   ),2) AS average_ride_distance,
   round(AVG(ride_duration) OVER (
       ORDER BY str_to_date(`month`, '%Y-%m')
       ROWS BETWEEN CURRENT ROW   AND  2 following 
   ),2) AS average_ride_duration
   FROM
   t1
) result_to_be_truncated
where `month`<=10

等效pandas代码

import pandas as pd
import numpy as np

def hopper_company_queries(drivers: pd.DataFrame, rides: pd.DataFrame, accepted_rides: pd.DataFrame) -> pd.DataFrame:
    # Generate the last day of each month in 2020
    month_ends = pd.date_range(start='2020-01-31', end='2020-12-31', freq='M')
    months2020 = pd.DataFrame({'month_end': month_ends})
    months2020['month'] = months2020['month_end'].dt.to_period('M').astype(str)

    # Ensure the 'requested_at' in rides is a datetime
    rides['requested_at'] = pd.to_datetime(rides['requested_at'])
    rides['month'] = rides['requested_at'].dt.to_period('M').astype(str)

    # Merge the months2020 DataFrame with rides and accepted_rides DataFrames
    merged = pd.merge(months2020, rides, how='left', on='month')
    merged = pd.merge(merged, accepted_rides, how='left', on='ride_id')

    # Group by month and calculate the sum of ride_distance and ride_duration
    grouped = merged.groupby('month').agg({
        'ride_distance': 'sum',
        'ride_duration': 'sum'
    }).fillna(0).reset_index()

    # Calculate the 3-month moving average for ride_distance and ride_duration
    # Shift the result by 2 to align with the SQL output (current row + 2 following rows)
    grouped['average_ride_distance'] = grouped['ride_distance'].rolling(window=3, min_periods=1).mean().round(2).shift(-2)
    grouped['average_ride_duration'] = grouped['ride_duration'].rolling(window=3, min_periods=1).mean().round(2).shift(-2)

    # Filter the DataFrame to only include months up to October
    # Extract the month number from the 'month' column before filtering to get the correct month numbers
    grouped['month_num'] = grouped['month'].apply(lambda x: int(x.split('-')[1]))
    grouped = grouped[grouped['month_num'] <= 10]

    # Select the required columns and rename them as in the expected output
    result_to_be_truncated = grouped[['month_num', 'average_ride_distance', 'average_ride_duration']]
    result_to_be_truncated.columns = ['month', 'average_ride_distance', 'average_ride_duration']

    # Drop the rows with NaN values that are a result of the shift operation
    result_to_be_truncated = result_to_be_truncated.dropna()

    return result_to_be_truncated
posted @ 2024-01-12 16:11  yhm138  阅读(11)  评论(0编辑  收藏  举报